Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REF] Create new stats module #273

Merged
merged 6 commits into from
May 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,26 @@ API
.. _calibration_ref:


:mod:`tedana.stats`: Statistical functions
--------------------------------------------------

.. automodule:: tedana.stats
:no-members:
:no-inherited-members:

.. autosummary:: tedana.stats
:toctree: generated/
:template: function.rst

tedana.stats.get_coeffs
tedana.stats.computefeats2
tedana.stats.getfbounds

.. currentmodule:: tedana

.. _calibration_ref:


:mod:`tedana.utils`: Utility functions
--------------------------------------------------

Expand All @@ -185,7 +205,6 @@ API

tedana.utils.andb
tedana.utils.dice
tedana.utils.getfbounds
tedana.utils.load_image
tedana.utils.make_adaptive_mask
tedana.utils.unmask
Expand Down
3 changes: 2 additions & 1 deletion tedana/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from tedana import model, utils, io
from tedana.decomposition._utils import eimask
from tedana.stats import computefeats2
from tedana.selection import kundu_tedpca
from tedana.due import due, BibTeX

Expand Down Expand Up @@ -214,7 +215,7 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG,
comp_maps = np.zeros((data_oc.shape[0], comp_ts.shape[1]))
for i_comp in range(comp_ts.shape[1]):
temp_comp_ts = comp_ts[:, i_comp][:, None]
comp_map = utils.unmask(model.computefeats2(data_oc, temp_comp_ts, mask), mask)
comp_map = utils.unmask(computefeats2(data_oc, temp_comp_ts, mask), mask)
comp_maps[:, i_comp] = np.squeeze(comp_map)
io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

Expand Down
13 changes: 7 additions & 6 deletions tedana/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from nilearn._utils import check_niimg
from nilearn.image import new_img_like

from tedana import model, utils
from tedana import utils
from tedana.stats import computefeats2, get_coeffs

LGR = logging.getLogger(__name__)

Expand Down Expand Up @@ -42,8 +43,8 @@ def split_ts(data, mmix, mask, comptable):
"""
acc = comptable[comptable.classification == 'accepted'].index.values

cbetas = model.get_coeffs(data - data.mean(axis=-1, keepdims=True),
mmix, mask)
cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True),
mmix, mask)
betas = cbetas[mask]
if len(acc) != 0:
hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
Expand Down Expand Up @@ -99,7 +100,7 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''):
dmdata = mdata.T - mdata.T.mean(axis=0)

# get variance explained by retained components
betas = model.get_coeffs(dmdata.T, mmix, mask=None)
betas = get_coeffs(dmdata.T, mmix, mask=None)
varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() /
(dmdata**2.).sum()) * 100
LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl))
Expand Down Expand Up @@ -161,7 +162,7 @@ def writefeats(data, mmix, mask, ref_img, suffix=''):
"""

# write feature versions of components
feats = utils.unmask(model.computefeats2(data, mmix, mask), mask)
feats = utils.unmask(computefeats2(data, mmix, mask), mask)
fname = filewrite(feats, 'feats_{0}'.format(suffix), ref_img)

return fname
Expand Down Expand Up @@ -220,7 +221,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img):

write_split_ts(ts, mmix, mask, comptable, ref_img, suffix='OC')

ts_B = model.get_coeffs(ts, mmix, mask)
ts_B = get_coeffs(ts, mmix, mask)
fout = filewrite(ts_B, 'betas_OC', ref_img)
LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout)))

Expand Down
121 changes: 3 additions & 118 deletions tedana/model/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from scipy import stats

from tedana import io, utils
from tedana.stats import getfbounds, computefeats2, get_coeffs


LGR = logging.getLogger(__name__)

Expand Down Expand Up @@ -120,7 +122,7 @@ def dependence_metrics(catd, tsoc, mmix, mask, t2s, tes, ref_img,
n_data_voxels = (t2s != 0).sum()
mu = catd.mean(axis=-1, dtype=float)
tes = np.reshape(tes, (n_echos, 1))
fmin, _, _ = utils.getfbounds(n_echos)
fmin, _, _ = getfbounds(n_echos)

# mask arrays
mumask = mu[t2s != 0]
Expand Down Expand Up @@ -402,120 +404,3 @@ def kundu_metrics(comptable, metric_maps):
comptable['d_table_score'] = d_table_rank.mean(axis=1)

return comptable


def computefeats2(data, mmix, mask, normalize=True):
"""
Converts `data` to component space using `mmix`

Parameters
----------
data : (S x T) array_like
Input data
mmix : (T [x C]) array_like
Mixing matrix for converting input data to component space, where `C`
is components and `T` is the same as in `data`
mask : (S,) array_like
Boolean mask array
normalize : bool, optional
Whether to z-score output. Default: True

Returns
-------
data_Z : (S x C) :obj:`numpy.ndarray`
Data in component space
"""
if data.ndim != 2:
raise ValueError('Parameter data should be 2d, not {0}d'.format(data.ndim))
elif mmix.ndim not in [2]:
raise ValueError('Parameter mmix should be 2d, not '
'{0}d'.format(mmix.ndim))
elif mask.ndim != 1:
raise ValueError('Parameter mask should be 1d, not {0}d'.format(mask.ndim))
elif data.shape[0] != mask.shape[0]:
raise ValueError('First dimensions (number of samples) of data ({0}) '
'and mask ({1}) do not match.'.format(data.shape[0],
mask.shape[0]))
elif data.shape[1] != mmix.shape[0]:
raise ValueError('Second dimensions (number of volumes) of data ({0}) '
'and mmix ({1}) do not match.'.format(data.shape[0],
mmix.shape[0]))

# demean masked data
data_vn = stats.zscore(data[mask], axis=-1)

# get betas of `data`~`mmix` and limit to range [-0.999, 0.999]
data_R = get_coeffs(data_vn, mmix, mask=None)
data_R[data_R < -0.999] = -0.999
data_R[data_R > 0.999] = 0.999

# R-to-Z transform
data_Z = np.arctanh(data_R)
if data_Z.ndim == 1:
data_Z = np.atleast_2d(data_Z).T

# normalize data
if normalize:
data_Zm = stats.zscore(data_Z, axis=0)
data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) /
data_Z.std(axis=0, keepdims=True))
return data_Z


def get_coeffs(data, X, mask=None, add_const=False):
"""
Performs least-squares fit of `X` against `data`

Parameters
----------
data : (S [x E] x T) array_like
Array where `S` is samples, `E` is echoes, and `T` is time
X : (T [x C]) array_like
Array where `T` is time and `C` is predictor variables
mask : (S [x E]) array_like
Boolean mask array
add_const : bool, optional
Add intercept column to `X` before fitting. Default: False

Returns
-------
betas : (S [x E] x C) :obj:`numpy.ndarray`
Array of `S` sample betas for `C` predictors
"""
if data.ndim not in [2, 3]:
raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim))
elif X.ndim not in [2]:
raise ValueError('Parameter X should be 2d, not {0}d'.format(X.ndim))
elif data.shape[-1] != X.shape[0]:
raise ValueError('Last dimension (dimension {0}) of data ({1}) does not '
'match first dimension of '
'X ({2})'.format(data.ndim, data.shape[-1], X.shape[0]))

# mask data and flip (time x samples)
if mask is not None:
if mask.ndim not in [1, 2]:
raise ValueError('Parameter data should be 1d or 2d, not {0}d'.format(mask.ndim))
elif data.shape[0] != mask.shape[0]:
raise ValueError('First dimensions of data ({0}) and mask ({1}) do not '
'match'.format(data.shape[0], mask.shape[0]))
mdata = data[mask, :].T
else:
mdata = data.T

# coerce X to >=2d
X = np.atleast_2d(X)

if len(X) == 1:
X = X.T

if add_const: # add intercept, if specified
X = np.column_stack([X, np.ones((len(X), 1))])

betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T
if add_const: # drop beta for intercept, if specified
betas = betas[:, :-1]

if mask is not None:
betas = utils.unmask(betas, mask)

return betas
4 changes: 2 additions & 2 deletions tedana/selection/tedica.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
from scipy import stats

from tedana import utils
from tedana.stats import getfbounds
from tedana.selection._utils import getelbow, clean_dataframe

LGR = logging.getLogger(__name__)
Expand Down Expand Up @@ -194,7 +194,7 @@ def kundu_selection_v2(comptable, n_echos, n_vols):
temp_comptable['variance explained'].diff() < varex_upper_p].index.values

# Compute elbows from other elbows
f05, _, f01 = utils.getfbounds(n_echos)
f05, _, f01 = getfbounds(n_echos)
kappas_nonsig = comptable.loc[comptable['kappa'] < f01, 'kappa']
# NOTE: Would an elbow from all Kappa values *ever* be lower than one from
# a subset of lower values?
Expand Down
3 changes: 2 additions & 1 deletion tedana/selection/tedpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np

from tedana import utils
from tedana.stats import getfbounds
from tedana.selection._utils import (getelbow_cons, getelbow, clean_dataframe)

LGR = logging.getLogger(__name__)
Expand Down Expand Up @@ -55,7 +56,7 @@ def kundu_tedpca(comptable, n_echos, kdaw=10., rdaw=1., stabilize=False):
np.arange(len(lower_diff_varex_norm))[lower_diff_varex_norm >= varex_norm_thr][0] + 1]
varex_norm_cum = np.cumsum(comptable['normalized variance explained'])

fmin, fmid, fmax = utils.getfbounds(n_echos)
fmin, fmid, fmax = getfbounds(n_echos)
if int(kdaw) == -1:
lim_idx = utils.andb([comptable['kappa'] < fmid,
comptable['kappa'] > fmin]) == 2
Expand Down
Loading