nilearn · kchawla-pi · Jan 13, 2020 · Dec 7, 2018 · Nov 20, 2019 · Nov 20, 2019
diff --git a/examples/02_first_level_models/plot_predictions_residuals.py b/examples/02_first_level_models/plot_predictions_residuals.py
@@ -0,0 +1,171 @@
+"""
+Predicted time series and residuals
+===================================
+
+Here we fit a First Level GLM with the `minimize_memory`-argument set to `False`.
+By doing so, the `FirstLevelModel`-object stores the residuals, which we can then inspect.
+Also, the predicted time series can be extracted, which is useful to assess the quality of the model fit.
+"""
+
+
+#########################################################################
+# Import modules
+# --------------
+from nistats.datasets import fetch_spm_auditory
+from nilearn import image
+from nilearn import masking
+import pandas as pd
+
+
+# load fMRI data
+subject_data = fetch_spm_auditory()
+fmri_img = image.concat_imgs(subject_data.func)
+
+# Make an average
+mean_img = image.mean_img(fmri_img)
+mask = masking.compute_epi_mask(mean_img)
+
+# Clean and smooth data
+fmri_img = image.clean_img(fmri_img, standardize=False)
+fmri_img = image.smooth_img(fmri_img, 5.)
+
+# load events
+events = pd.read_table(subject_data['events'])
+
+
+#########################################################################
+# Fit model
+# ---------
+# Note that `minimize_memory` is set to `False` so that `FirstLevelModel`
+# stores the residuals.
+# `signal_scaling` is set to False, so we keep the same scaling as the
+# original data in `fmri_img`.
+from nistats.first_level_model import FirstLevelModel
+
+fmri_glm = FirstLevelModel(t_r=7,
+                           drift_model='cosine',
+                           signal_scaling=False,
+                           mask_img=mask,
+                           minimize_memory=False)
+
+fmri_glm = fmri_glm.fit(fmri_img, events)
+
+
+#########################################################################
+# Calculate and plot contrast
+# ---------------------------
+from nilearn import plotting
+
+z_map = fmri_glm.compute_contrast('active - rest')
+
+plotting.plot_stat_map(z_map, bg_img=mean_img, threshold=3.1)
+
+#########################################################################
+# Extract the largest clusters
+# ----------------------------
+from nistats.reporting import get_clusters_table
+from nilearn import input_data
+
+table = get_clusters_table(z_map, stat_threshold=3.1,
+                           cluster_threshold=20).set_index('Cluster ID', drop=True)
+table.head()
+
+# get the 6 largest clusters' max x, y, and z coordinates
+coords = table.loc[range(1, 7), ['X', 'Y', 'Z']].values
+
+# extract time series from each coordinate
+masker = input_data.NiftiSpheresMasker(coords)
+real_timeseries = masker.fit_transform(fmri_img)
+predicted_timeseries = masker.fit_transform(fmri_glm.predicted[0])
+
+
+#########################################################################
+# Plot predicted and actual time series for 6 most significant clusters
+# ---------------------------------------------------------------------
+import matplotlib.pyplot as plt
+
+# colors for each of the clusters
+colors = ['blue', 'navy', 'purple', 'magenta', 'olive', 'teal']
+# plot the time series and corresponding locations
+fig1, axs1 = plt.subplots(2, 6)
+for i in range(0, 6):
+    # plotting time series
+    axs1[0, i].set_title('Cluster peak {}\n'.format(coords[i]))
+    axs1[0, i].plot(real_timeseries[:, i], c=colors[i], lw=2)
+    axs1[0, i].plot(predicted_timeseries[:, i], c='r',  ls='--', lw=2)
+    axs1[0, i].set_xlabel('Time')
+    axs1[0, i].set_ylabel('Signal intensity', labelpad=0)
+    # plotting image below the time series
+    roi_img = plotting.plot_stat_map(
+        z_map, cut_coords=[coords[i][2]], threshold=3.1, figure=fig1,
+        axes=axs1[1, i], display_mode='z', colorbar=False, bg_img=mean_img)
+    roi_img.add_markers([coords[i]], colors[i], 300)
+fig1.set_size_inches(24, 14)
+
+
+#########################################################################
+# Get residuals
+# -------------
+resid = masker.fit_transform(fmri_glm.residuals[0])
+
+
+#########################################################################
+# Plot distribution of residuals
+# ------------------------------
+# Note that residuals are not really distributed normally.
+fig2, axs2 = plt.subplots(2, 3)
+axs2 = axs2.flatten()
+for i in range(0, 6):
+    axs2[i].set_title('Cluster peak {}\n'.format(coords[i]))
+    axs2[i].hist(resid[:, i], color=colors[i])
+    print('Mean residuals: {}'.format(resid[:, i].mean()))
+
+fig2.set_size_inches(12, 7)
+fig2.tight_layout()
+
+
+#########################################################################
+# Plot R-squared
+# --------------
+# Because we stored the residuals, we can plot the R-squared: the proportion
+# of explained variance of the GLM as a whole. Note that the R-squared is markedly
+# lower deep down the brain, where there is more physiological noise and we 
+# are further away from the receive coils. However, R-Squared should be interpreted
+# with a grain of salt. The R-squared value will necessarily increase with
+# the addition of more factors (such as rest, active, drift, motion) into the GLM.
+# Additionally, we are looking at the overall fit of the model, so we are
+# unable to say whether a voxel/region has a large R-squared value because
+# the voxel/region is responsive to the experiment (such as active or rest)
+# or because the voxel/region fits the noise factors (such as drift or motion)
+# that could be present in the GLM. To isolate the influence of the experiment,
+# we can use an F-test as shown in the next section.
+plotting.plot_stat_map(fmri_glm.r_square[0],
+                       bg_img=mean_img, threshold=.1, display_mode='z', cut_coords=7)
+
+
+#########################################################################
+# Calculate and Plot F-test
+# -------------------------
+# The F-test tells you how well the GLM fits effects of interest such as 
+# the active and rest conditions together. This is different from R-squared,
+# which tells you how well the overall GLM fits the data, including active, rest
+# and all the other columns in the design matrix such as drift and motion.
+import numpy as np
+
+design_matrix = fmri_glm.design_matrices_[0]
+
+# contrast with a one for "active" and zero everywhere else
+active = np.array([1 if c == 'active' else 0 for c in design_matrix.columns])
+
+# contrast with a one for "rest" and zero everywhere else
+rest = np.array([1 if c == 'rest' else 0 for c in design_matrix.columns])
+
+effects_of_interest = np.vstack((active, rest))
+# f-test for rest and activity
+z_map_ftest = fmri_glm.compute_contrast(
+    effects_of_interest,
+    stat_type='F',
+    output_type='z_score')
+
+plotting.plot_stat_map(z_map_ftest,
+                       bg_img=mean_img, threshold=3.1, display_mode='z', cut_coords=7)
diff --git a/nistats/first_level_model.py b/nistats/first_level_model.py
@@ -18,6 +18,7 @@
 import numpy as np
 import pandas as pd
 from nibabel import Nifti1Image
+from nibabel.onetime import setattr_on_read
 
 from sklearn.base import (BaseEstimator,
                           clone,
@@ -36,6 +37,7 @@
 from .regression import (ARModel,
                          OLSModel,
                          SimpleRegressionResults,
+                         RegressionResults
                          )
 from .utils import (_basestring,
                     _check_run_tables,
@@ -117,14 +119,14 @@ def run_glm(Y, X, noise_model='ar1', bins=100, n_jobs=1, verbose=0):
     acceptable_noise_models = ['ar1', 'ols']
     if noise_model not in acceptable_noise_models:
         raise ValueError(
-            "Acceptable noise models are {0}. You provided 'noise_model={1}'".\
-                format(acceptable_noise_models, noise_model))
+            "Acceptable noise models are {0}. You provided 'noise_model={1}'".
+            format(acceptable_noise_models, noise_model))
 
     if Y.shape[0] != X.shape[0]:
         raise ValueError(
             'The number of rows of Y should match the number of rows of X.'
-            ' You provided X with shape {0} and Y with shape {1}'.\
-                format(X.shape, Y.shape))
+            ' You provided X with shape {0} and Y with shape {1}'.
+            format(X.shape, Y.shape))
 
     # Create the model
     ols_result = OLSModel(X).fit(Y)
@@ -309,6 +311,7 @@ def __init__(self, t_r=None, slice_time_ref=0., hrf_model='glover',
         else:
             raise ValueError('signal_scaling must be "False", "0", "1"'
                              ' or "(0, 1)"')
+
         self.noise_model = noise_model
         self.verbose = verbose
         self.n_jobs = n_jobs
@@ -583,6 +586,96 @@ def compute_contrast(self, contrast_def, stat_type=None,
 
         return outputs if output_type == 'all' else output
 
+    def _get_voxelwise_model_attribute(self, attribute, result_as_time_series=True):
+        """Transform RegressionResults instances within a dictionary
+        (whose keys represent the autoregressive coefficient under the 'ar1'
+        noise model or only 0.0 under 'ols' noise_model and values are the
+        RegressionResults instances) into input nifti space.
+
+        Parameters
+        ----------
+        attribute : str
+            an attribute of a RegressionResults instance.
+            possible values include: resid, norm_resid, predicted,
+            SSE, r_square, MSE.
+        result_as_time_series : bool, optional
+            whether the RegressionResult attribute has a value
+            per timepoint of the input nifti image.
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        # check if valid attribute is being accessed.
+        all_attributes = dict(vars(RegressionResults)).keys()
+        possible_attributes = [prop for prop in all_attributes if '__' not in prop]
+        if attribute not in possible_attributes:
+            msg = "attribute must be one of: {attr}".format(attr=possible_attributes)
+            raise ValueError(msg)
+
+        if self.minimize_memory:
+            raise ValueError('To access voxelwise attributes like R-squared, residuals, '
+                    'and predictions, the `FirstLevelModel`-object needs to store '
+                    'there attributes. To do so, set `minimize_memory` to `False` '
+                    'when initializing the `FirstLevelModel`-object.')
+
+        if self.labels_ is None or self.results_ is None:
+            raise ValueError('The model has not been fit yet')
+
+        output = []
+
+        for design_matrix, labels, results in zip(self.design_matrices_, self.labels_, self.results_):        
+
+            if result_as_time_series:
+                voxelwise_attribute = np.zeros((design_matrix.shape[0], len(labels)))
+            else:
+                voxelwise_attribute = np.zeros((1, len(labels)))
+
+            for label_ in results:
+                label_mask = labels == label_
+                voxelwise_attribute[:, label_mask] = getattr(results[label_], attribute)
+
+            output.append(self.masker_.inverse_transform(voxelwise_attribute))
+
+            return output
+
+    @setattr_on_read
+    def residuals(self):
+        """Transform voxelwise residuals to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        return self._get_voxelwise_model_attribute('resid')
+
+    @setattr_on_read
+    def predicted(self):
+        """Transform voxelwise predicted values to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        return self._get_voxelwise_model_attribute('predicted')
+
+    @setattr_on_read
+    def r_square(self):
+        """Transform voxelwise r-squared values to the same shape
+        as the input Nifti1Image(s)
+
+        Returns
+        -------
+        output : list
+            a list of Nifti1Image(s)
+        """
+        return self._get_voxelwise_model_attribute('r_square', result_as_time_series=False)
+
 
 @replace_parameters({'mask': 'mask_img'}, end_version='next')
 def first_level_models_from_bids(

diff --git a/nistats/regression.py b/nistats/regression.py
@@ -276,6 +276,7 @@ def __init__(self, theta, Y, model, wY, wresid, cov=None, dispersion=1.,
                                         dispersion, nuisance)
         self.wY = wY
         self.wresid = wresid
+        self.wdesign = model.wdesign
 
     @setattr_on_read
     def resid(self):
@@ -310,7 +311,7 @@ def predicted(self):
         """
         beta = self.theta
         # the LikelihoodModelResults has parameters named 'theta'
-        X = self.model.design
+        X = self.wdesign
         return np.dot(X, beta)
 
     @setattr_on_read
@@ -319,13 +320,20 @@ def SSE(self):
         """
         return (self.wresid ** 2).sum(0)
 
+    @setattr_on_read
+    def r_square(self):
+        """Proportion of explained variance.
+        If not from an OLS model this is "pseudo"-R2.
+        """
+        return np.var(self.predicted, 0) / np.var(self.wY, 0)
+
     @setattr_on_read
     def MSE(self):
         """ Mean square (error) """
         return self.SSE / self.df_resid
 
 
-class SimpleRegressionResults(LikelihoodModelResults):
+class SimpleRegressionResults(RegressionResults):
     """This class contains only information of the model fit necessary
     for contast computation.
 
@@ -347,41 +355,19 @@ def __init__(self, results):
         # put this as a parameter of LikelihoodModel
         self.df_resid = self.df_total - self.df_model
 
+        self.wdesign = results.model.wdesign
+
     def logL(self, Y):
         """
         The maximized log-likelihood
         """
-        raise ValueError('can not use this method for simple results')
+        raise ValueError('minimize_memory should be set to False to make residuals or predictions.')
 
-    def resid(self, Y):
+    def resid(self):
         """
         Residuals from the fit.
         """
-        return Y - self.predicted
-
-    def norm_resid(self, Y):
-        """
-        Residuals, normalized to have unit length.
-
-        Notes
-        -----
-        Is this supposed to return "stanardized residuals,"
-        residuals standardized
-        to have mean zero and approximately unit variance?
-
-        d_i = e_i / sqrt(MS_E)
-
-        Where MS_E = SSE / (n - k)
-
-        See: Montgomery and Peck 3.2.1 p. 68
-             Davidson and MacKinnon 15.2 p 662
-        """
-        return self.resid(Y) * positive_reciprocal(np.sqrt(self.dispersion))
+        raise ValueError('minimize_memory should be set to False to make residuals or predictions.')
 
-    def predicted(self):
-        """ Return linear predictor values from a design matrix.
-        """
-        beta = self.theta
-        # the LikelihoodModelResults has parameters named 'theta'
-        X = self.model.design
-        return np.dot(X, beta)
+    def norm_resid(self):
+        raise ValueError('minimize_memory should be set to False to make residuals or predictions.')