diff --git a/.github/workflows/pylint.yml b/.github/workflows/ruff_linting.yml similarity index 57% rename from .github/workflows/pylint.yml rename to .github/workflows/ruff_linting.yml index 110596e..b019e98 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/ruff_linting.yml @@ -1,4 +1,4 @@ -name: Pylint +name: Ruff linting on: [push] @@ -18,8 +18,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install gpflow>=2.6.0 numpy pandas==1.5.3 pylint rpy2==3.4.5 scipy statsmodels - - name: Analysing the code with pylint + pip install gpflow>=2.6.0 numpy pandas==1.5.3 rpy2==3.4.5 ruff scipy statsmodels + - name: Analysing the code with Ruff run: | - pylint $(git ls-files '*.py') --disable=line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring --fail-under=7 - # continue-on-error: true + ruff check . --exit-zero + continue-on-error: true diff --git a/README.md b/README.md index 3117934..3f548c5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # FCEst -[![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![Coverage](https://img.shields.io/badge/coverage-50%25-brightgreen)](coverage.xml) `FCEst` is a package for estimating static and time-varying functional connectivity (TVFC) in Python. diff --git a/fcest/features/__init__.py b/fcest/features/__init__.py new file mode 100644 index 0000000..01cd64b --- /dev/null +++ b/fcest/features/__init__.py @@ -0,0 +1,9 @@ +from .brain_states import BrainStatesExtractor +from .graph_metrics import GraphMetricsExtractor + +__all__ = [ + "brain_states", + "graph_metrics", + "BrainStatesExtractor", + "GraphMetricsExtractor", +] diff --git a/fcest/features/brain_states.py b/fcest/features/brain_states.py new file mode 100644 index 0000000..6694e77 --- /dev/null +++ b/fcest/features/brain_states.py @@ -0,0 +1,177 @@ +import logging +import os + +import numpy as np +import pandas as pd +from sklearn.cluster import KMeans + +from fcest.helpers.array_operations import reconstruct_symmetric_matrix_from_tril + + +class BrainStatesExtractor: + + def __init__( + self, + connectivity_metric: str, + num_time_series: int, + tvfc_estimates: np.array, + ) -> None: + """ + Class for extracting brain states from TVFC estimates. + + Parameters + ---------- + connectivity_metric : str, default='correlation' + tvfc_estimates : np.array + Array of shape (num_subjects, num_time_steps, num_features). + """ + logging.info("Initializing BrainStatesExtractor...") + + self.connectivity_metric = connectivity_metric + self.num_time_steps = tvfc_estimates.shape[1] + self.num_time_series = num_time_series + + self.tvfc_estimates_tril = tvfc_estimates.reshape(-1, tvfc_estimates.shape[-1]) + + def extract_brain_states( + self, + num_brain_states: int, + ): + """ + Extract brain states from TVFC estimates. + + Parameters + ---------- + num_brain_states : int + """ + return self.compute_basis_state( + all_subjects_tril_tvfc=self.tvfc_estimates_tril, + num_basis_states=num_brain_states, + num_time_series=self.num_time_series, + num_time_steps=self.num_time_steps, + ) + + def compute_basis_state( + self, + all_subjects_tril_tvfc: np.array, + num_basis_states: int, + num_time_series: int, + num_time_steps: int, + brain_states_savedir: str, + ) -> tuple[float, pd.DataFrame, pd.DataFrame]: + """ + Brain states (recurring whole-brain patterns) are a way to summarize estimated TVFC. + Here we follow Allen et al. (2012) and use k-means clustering to find the brain states across subjects, + independently for each of the two runs within each of the two session. + + Parameters + ---------- + :param all_subjects_tril_tvfc: + Array of shape (num_subjects * N, D*(D-1)/2). + :param num_basis_states: + The number of states/clusters we want to extract. + Ideally this number would be determined automatically, or based on some k-means elbow analysis. + :param num_time_series: + :param num_time_steps: + :param brain_states_savedir: + Directory to save brain states to. + :return: + """ + logging.info("Running k-means clustering...") + kmeans = KMeans( + n_clusters=num_basis_states, + algorithm='lloyd', + n_init=10, + verbose=0, + ).fit(all_subjects_tril_tvfc) + logging.info("Finished k-means clustering.") + + cluster_centers = self._get_cluster_centroids( + kmeans=kmeans, + num_time_series=num_time_series + ) + + cluster_centers, cluster_sort_order = self._sort_cluster_centers( + cluster_centers=cluster_centers + ) + + # Save clusters (i.e. brain states) to file. + if not os.path.exists(brain_states_savedir): + os.makedirs(brain_states_savedir) + for i_cluster, cluster_centroid in enumerate(cluster_centers): + cluster_df = pd.DataFrame(cluster_centroid) # (D, D) + cluster_df.to_csv( + os.path.join( + brain_states_savedir, + f'{self.connectivity_metric:s}_brain_state_{i_cluster:d}.csv' + ), + float_format='%.2f', + ) + logging.info(f"Brain state saved in '{brain_states_savedir:s}'.") + + all_subjects_brain_state_assignments_df = self._get_brain_state_assignments( + labels=kmeans.labels_, + num_time_steps=num_time_steps + ) # (num_subjects, N) + + all_subjects_brain_state_assignments_df = all_subjects_brain_state_assignments_df.replace( + to_replace=cluster_sort_order, + value=np.arange(num_basis_states) + ) + + all_subjects_dwell_times_df = self._compute_dwell_time( + num_brain_states=num_basis_states, + brain_state_assignments=all_subjects_brain_state_assignments_df + ) # (num_subjects, num_brain_states) + + return kmeans.inertia_, all_subjects_brain_state_assignments_df, all_subjects_dwell_times_df + + def _get_cluster_centroids(self, kmeans: KMeans, num_time_series: int) -> np.array: + """ + Get cluster centroids (centers) from k-means clustering. + """ + # Get cluster centers - these are the characteristic basis brain states. + cluster_centers = kmeans.cluster_centers_ # (num_clusters, num_features) + + # Reconstruct correlation matrix per cluster. + cluster_centers = [ + reconstruct_symmetric_matrix_from_tril(cluster_vector, num_time_series) for cluster_vector in cluster_centers + ] + cluster_centers = np.array(cluster_centers) # (num_clusters, D, D) + + return cluster_centers + + def _sort_cluster_centers(self, cluster_centers: np.array) -> np.array: + + # Re-order clusters based on high to low (descending) 'contrast' - higher contrast states are more interesting. + cluster_contrasts = np.var(cluster_centers, axis=(1, 2)) # (num_clusters, ) + cluster_sort_order = np.argsort(cluster_contrasts)[::-1] # (num_clusters, ) + + cluster_centers = cluster_centers[cluster_sort_order, :, :] # (num_clusters, D, D) + + return cluster_centers, cluster_sort_order + + def _get_brain_state_assignments( + self, + labels, + num_time_steps: int, + ) -> pd.DataFrame: + """ + Get labels per covariance matrix, which can be used to re-construct a states time series per subject. + Each subject is now only characterized by an assignment to one of the clusters at each time step. + + Parameters + ---------- + :param labels: + Array of shape (num_subjects * N, ). + :param num_time_steps: + :return: + """ + assert len(labels) % num_time_steps == 0 + num_subjects = int(len(labels) / num_time_steps) + + labels = labels.reshape(num_subjects, num_time_steps) # (num_subjects, N) + + labels_df = pd.DataFrame(labels) + + return labels_df diff --git a/fcest/features/graph_metrics.py b/fcest/features/graph_metrics.py new file mode 100644 index 0000000..2bea09f --- /dev/null +++ b/fcest/features/graph_metrics.py @@ -0,0 +1,17 @@ +import networkx as nx + + +class GraphMetricsExtractor: + + def __init__(self, graph): + self.graph = graph + + def extract(self): + return { + "nodes": self.graph.number_of_nodes(), + "edges": self.graph.number_of_edges(), + "density": nx.density(self.graph), + "diameter": nx.diameter(self.graph), + "average_clustering": nx.average_clustering(self.graph), + "average_shortest_path_length": nx.average_shortest_path_length(self.graph), + } diff --git a/fcest/helpers/array_operations.py b/fcest/helpers/array_operations.py index 291138b..194d8ef 100644 --- a/fcest/helpers/array_operations.py +++ b/fcest/helpers/array_operations.py @@ -180,3 +180,45 @@ def _is_positive_definite(matrix: np.array) -> bool: return True except la.LinAlgError: return False + + +def reconstruct_symmetric_matrix_from_tril( + cluster_array: np.array, + num_time_series: int, + diagonal: str = 'ones', +) -> np.array: + """ + Reconstruct full matrix from lower triangular values. + + Parameters + ---------- + :param cluster_array: + :param num_time_series: + :param diagonal: + :return: + Array of shape (D, D). + """ + match diagonal: + case 'ones': + reconstructed_corr_matrix = np.ones(shape=(num_time_series, num_time_series)) # (D, D) + case 'zeros': + reconstructed_corr_matrix = np.zeros(shape=(num_time_series, num_time_series)) # (D, D) + case _: + reconstructed_corr_matrix = np.ones(shape=(num_time_series, num_time_series)) # (D, D) + + mask = np.tri(num_time_series, dtype=bool, k=-1) # matrix of bools + + # Add lower triangular values. + reconstructed_corr_matrix[mask] = cluster_array + + # Add upper triangular values (transpose matrix first and then re-add lower triangular values). + reconstructed_corr_matrix = reconstructed_corr_matrix.T + reconstructed_corr_matrix[mask] = cluster_array + + assert _check_symmetric(reconstructed_corr_matrix) + + return reconstructed_corr_matrix + + +def _check_symmetric(a: np.array, rtol=1e-05, atol=1e-08) -> bool: + return np.allclose(a, a.T, rtol=rtol, atol=atol) diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..7dbfd67 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,77 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.10 +target-version = "py310" + +[lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +# Enable auto-formatting of code examples in docstrings. Markdown, +# reStructuredText code/literal blocks and doctests are all supported. +# +# This is currently disabled by default, but it is planned for this +# to be opt-out in the future. +docstring-code-format = false + +# Set the line length limit used when formatting code snippets in +# docstrings. +# +# This only has an effect when the `docstring-code-format` setting is +# enabled. +docstring-code-line-length = "dynamic"