nilearn · kchawla-pi · Jan 13, 2020 · Dec 7, 2018 · Nov 20, 2019 · Nov 20, 2019
diff --git a/examples/02_first_level_models/plot_predictions_residuals.py b/examples/02_first_level_models/plot_predictions_residuals.py
@@ -0,0 +1,120 @@
+"""
+Predicted time series and residuals
+===================================
+
+Here we fit a First Level GLM with the `minimize_memory`-argument set to `False`.
+By doing so, the `FirstLevelModel`-object stores the residuals, which we can then inspect.
+Also, the predicted time series can be extracted, which is useful to assess the quality of the model fit.
+"""
+
+
+#########################################################################
+# Import modules
+# --------------
+from nistats.datasets import fetch_spm_auditory
+from nilearn import input_data, image
+import matplotlib.pyplot as plt
+from nilearn import plotting, masking
+from nistats.reporting import get_clusters_table
+from nistats.first_level_model import FirstLevelModel
+import pandas as pd
+
+# load fMRI data
+subject_data = fetch_spm_auditory()
+fmri_img = image.concat_imgs(subject_data.func)
+
+# Make an average
+mean_img = image.mean_img(fmri_img)
+mask = masking.compute_epi_mask(mean_img)
+
+# Clean and smooth data
+fmri_img = image.clean_img(fmri_img, standardize=False)
+fmri_img = image.smooth_img(fmri_img, 5.)
+
+# load events
+events = pd.read_table(subject_data['events'])
+
+
+#########################################################################
+# Fit model
+# ---------
+# Note that `minimize_memory` is set to `False` so that `FirstLevelModel`
+# stores the residuals.
+# `signal_scaling` is set to False, so we keep the same scaling as the
+# original data in `fmri_img`.
+
+fmri_glm = FirstLevelModel(t_r=7,
+                           drift_model='cosine',
+                           signal_scaling=False,
+                           mask_img=mask,
+                           minimize_memory=False)
+
+fmri_glm = fmri_glm.fit(fmri_img, events)
+
+
+#########################################################################
+# Calculate and plot contrast
+# ---------------------------
+z_map = fmri_glm.compute_contrast('active - rest')
+
+plotting.plot_stat_map(z_map, bg_img=mean_img, threshold=3.1)
+
+#########################################################################
+# Extract the largest clusters
+# ----------------------------
+
+table = get_clusters_table(z_map, stat_threshold=3.1,
+                           cluster_threshold=20).set_index('Cluster ID', drop=True)
+table.head()
+
+masker = input_data.NiftiSpheresMasker(table.loc[range(1, 7), ['X', 'Y', 'Z']].values)
+
+real_timeseries = masker.fit_transform(fmri_img)
+predicted_timeseries = masker.fit_transform(fmri_glm.predicted)
+
+
+#########################################################################
+# Plot predicted and actual time series for 6 most significant clusters
+# ---------------------------------------------------------------------
+for i in range(1, 7):
+    plt.subplot(2, 3, i)
+    plt.title('Cluster peak {}\n'.format(table.loc[i, ['X', 'Y', 'Z']].tolist()))
+    plt.plot(real_timeseries[:, i-1], c='k', lw=2)
+    plt.plot(predicted_timeseries[:, i-1], c='r',  ls='--', lw=2)
+    plt.xlabel('Time')
+    plt.ylabel('Signal intensity')
+
+plt.gcf().set_size_inches(12, 7)
+plt.tight_layout()
+
+#########################################################################
+# Get residuals
+# -------------
+resid = masker.fit_transform(fmri_glm.residuals)
+
+
+#########################################################################
+# Plot distribution of residuals
+# ------------------------------
+# Note that residuals are not really distributed normally.
+
+
+for i in range(1, 7):
+    plt.subplot(2, 3, i)
+    plt.title('Cluster peak {}\n'.format(table.loc[i, ['X', 'Y', 'Z']].tolist()))
+    plt.hist(resid[:, i-1])
+    print('Mean residuals: {}'.format(resid[:, i-1].mean()))
+
+plt.gcf().set_size_inches(12, 7)
+plt.tight_layout()
+
+
+#########################################################################
+# Plot R-squared
+# --------------
+# Because we stored the residuals, we can plot the R-squared: the proportion
+# of explained variance of the GLM as a whole. Note that the R-squared is markedly
+# lower deep down the brain, where there is more physiological noise and we 
+# are further away from the receive coils.
+plotting.plot_stat_map(fmri_glm.r_square, 
+                       bg_img=mean_img, threshold=.1, display_mode='z', cut_coords=7)
diff --git a/nistats/first_level_model.py b/nistats/first_level_model.py
@@ -18,6 +18,7 @@
 import numpy as np
 import pandas as pd
 from nibabel import Nifti1Image
+from nibabel.onetime import setattr_on_read
 
 from sklearn.base import (BaseEstimator,
                           clone,
@@ -117,14 +118,14 @@ def run_glm(Y, X, noise_model='ar1', bins=100, n_jobs=1, verbose=0):
     acceptable_noise_models = ['ar1', 'ols']
     if noise_model not in acceptable_noise_models:
         raise ValueError(
-            "Acceptable noise models are {0}. You provided 'noise_model={1}'".\
-                format(acceptable_noise_models, noise_model))
+            "Acceptable noise models are {0}. You provided 'noise_model={1}'".
+            format(acceptable_noise_models, noise_model))
 
     if Y.shape[0] != X.shape[0]:
         raise ValueError(
             'The number of rows of Y should match the number of rows of X.'
-            ' You provided X with shape {0} and Y with shape {1}'.\
-                format(X.shape, Y.shape))
+            ' You provided X with shape {0} and Y with shape {1}'.
+            format(X.shape, Y.shape))
 
     # Create the model
     ols_result = OLSModel(X).fit(Y)
@@ -309,6 +310,7 @@ def __init__(self, t_r=None, slice_time_ref=0., hrf_model='glover',
         else:
             raise ValueError('signal_scaling must be "False", "0", "1"'
                              ' or "(0, 1)"')
+
         self.noise_model = noise_model
         self.verbose = verbose
         self.n_jobs = n_jobs
@@ -583,6 +585,94 @@ def compute_contrast(self, contrast_def, stat_type=None,
 
         return outputs if output_type == 'all' else output
 
+    def get_voxelwise_model_attribute_(self, attribute, result_as_time_series=True):
+        """Transform RegressionResults instances within a dictionary
+        (whose keys represent the autoregressive coefficient under the 'ar1'
+        noise model or only 0.0 under 'ols' noise_model and values are the
+        RegressionResults instances) into input nifti space.
+
+        Parameters
+        ----------
+        attribute : str
+            an attribute of a RegressionResults instance
+        result_as_time_series : bool, optional
+            whether the RegressionResult attribute has a value
+            per timepoint of the input nifti image.
+
+        Returns
+        -------
+        output : list or Nifti1Image
+            a list of Nifti1Images if FirstLevelModel is fit with
+            a list of Nifti1Images, or a single Nifti1Image otherwise.
+        """
+        if self.minimize_memory:
+            raise ValueError('To access voxelwise attributes like R-squared, residuals, '
+                    'and predictions, the `FirstLevelModel`-object needs to store '
+                    'there attributes. To do so, set `minimize_memory` to `False` '
+                    'when initializing the `FirstLevelModel`-object.')
+
+        if self.labels_ is None or self.results_ is None:
+            raise ValueError('The model has not been fit yet')
+
+        output = []
+
+        for design_matrix, labels, results in zip(self.design_matrices_, self.labels_, self.results_):        
+
+            if result_as_time_series:
+                voxelwise_attribute = np.zeros((design_matrix.shape[0], len(labels)))
+            else:
+                voxelwise_attribute = np.zeros((1, len(labels)))
+
+            for label_ in results:
+                label_mask = labels == label_
+                voxelwise_attribute[:, label_mask] = getattr(results[label_], attribute)
+
+            output.append(self.masker_.inverse_transform(voxelwise_attribute))
+
+        if len(output) == 1:
+            return output[0]
+        else:
+            return output
+
+    @setattr_on_read
+    def residuals(self):
+        """Transform voxelwise residuals to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list or Nifti1Image
+            a list of Nifti1Images if FirstLevelModel is fit with
+            a list of Nifti1Images, or a single Nifti1Image otherwise.
+        """
+        return self.get_voxelwise_model_attribute_('resid')
+
+    @setattr_on_read
+    def predicted(self):
+        """Transform voxelwise predicted values to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list or Nifti1Image
+            a list of Nifti1Images if FirstLevelModel is fit with
+            a list of Nifti1Images, or a single Nifti1Image otherwise.
+        """
+        return self.get_voxelwise_model_attribute_('predicted')
+
+    @setattr_on_read
+    def r_square(self):
+        """Transform voxelwise r-squared values to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list or Nifti1Image
+            a list of Nifti1Images if FirstLevelModel is fit with
+            a list of Nifti1Images, or a single Nifti1Image otherwise.
+        """
+        return self.get_voxelwise_model_attribute_('r_square', result_as_time_series=False)
+
 
 @replace_parameters({'mask': 'mask_img'}, end_version='next')
 def first_level_models_from_bids(

diff --git a/nistats/regression.py b/nistats/regression.py
@@ -61,7 +61,6 @@ class OLSModel(object):
     df_resid : scalar
         Degrees of freedom of the residuals.  Number of observations less the
         rank of the design.
-
     df_model : scalar
         Degrees of freedome of the model.  The rank of the design.
     """
@@ -276,6 +275,7 @@ def __init__(self, theta, Y, model, wY, wresid, cov=None, dispersion=1.,
                                         dispersion, nuisance)
         self.wY = wY
         self.wresid = wresid
+        self.wdesign = model.wdesign
 
     @setattr_on_read
     def resid(self):
@@ -310,7 +310,7 @@ def predicted(self):
         """
         beta = self.theta
         # the LikelihoodModelResults has parameters named 'theta'
-        X = self.model.design
+        X = self.wdesign
         return np.dot(X, beta)
 
     @setattr_on_read
@@ -319,13 +319,20 @@ def SSE(self):
         """
         return (self.wresid ** 2).sum(0)
 
+    @setattr_on_read
+    def r_square(self):
+        """Proportion of explained variance.
+        If not from an OLS model this is "pseudo"-R2.
+        """
+        return np.var(self.predicted, 0) / np.var(self.wY, 0)
+
     @setattr_on_read
     def MSE(self):
         """ Mean square (error) """
         return self.SSE / self.df_resid
 
 
-class SimpleRegressionResults(LikelihoodModelResults):
+class SimpleRegressionResults(RegressionResults):
     """This class contains only information of the model fit necessary
     for contast computation.
 
@@ -347,41 +354,19 @@ def __init__(self, results):
         # put this as a parameter of LikelihoodModel
         self.df_resid = self.df_total - self.df_model
 
+        self.wdesign = results.model.wdesign
+
     def logL(self, Y):
         """
         The maximized log-likelihood
         """
-        raise ValueError('can not use this method for simple results')
+        raise ValueError('minimize_memory should be set to False to make residuals or predictions.')
 
-    def resid(self, Y):
+    def resid(self):
         """
         Residuals from the fit.
         """
-        return Y - self.predicted
-
-    def norm_resid(self, Y):
-        """
-        Residuals, normalized to have unit length.
-
-        Notes
-        -----
-        Is this supposed to return "stanardized residuals,"
-        residuals standardized
-        to have mean zero and approximately unit variance?
+        raise ValueError('minimize_memory should be set to False to make residuals or predictions.')
 
-        d_i = e_i / sqrt(MS_E)
-
-        Where MS_E = SSE / (n - k)
-
-        See: Montgomery and Peck 3.2.1 p. 68
-             Davidson and MacKinnon 15.2 p 662
-        """
-        return self.resid(Y) * positive_reciprocal(np.sqrt(self.dispersion))
-
-    def predicted(self):
-        """ Return linear predictor values from a design matrix.
-        """
-        beta = self.theta
-        # the LikelihoodModelResults has parameters named 'theta'
-        X = self.model.design
-        return np.dot(X, beta)
+    def norm_resid(self):
+        raise ValueError('minimize_memory should be set to False to make residuals or predictions.')