From 22b0de47ca0c25f0baf2a46112958d3d879551ed Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 18 Sep 2024 12:56:54 +0100 Subject: [PATCH 1/6] Update pre-commit --- .pre-commit-config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d51ab2a..552bf95 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,3 +53,7 @@ repos: args: [--toml, pyproject.toml] additional_dependencies: - tomli + - repo: https://github.com/adamchainz/blacken-docs + rev: 1.18.0 + hooks: + - id: blacken-docs From cf896ab683255e55a315a1f1e0e460fec98b4710 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 18 Sep 2024 13:08:23 +0100 Subject: [PATCH 2/6] Run pre-commit --- contributing.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contributing.md b/contributing.md index c949e39..16fcaa6 100644 --- a/contributing.md +++ b/contributing.md @@ -1,4 +1,4 @@ -# Contributing +`# Contributing This is a quick guide on how to follow best practice and contribute smoothly to `ElementEmbeddings`. @@ -49,3 +49,4 @@ pre-commit run --all-files # optionally run hooks on all files ``` Pre-commit hooks will check all files when you commit changes, automatically fixing any files which are not formatted correctly. Those files will need to be staged again before re-attempting the commit. +` From c3ef97ca46db2a0820d1687acead96bea2c82f64 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 18 Sep 2024 13:08:39 +0100 Subject: [PATCH 3/6] Run pre-commit --- README.md | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 2595a0d..9bb7ab0 100644 --- a/README.md +++ b/README.md @@ -71,21 +71,25 @@ With -e pip will create links to the source folder so that changes to the code w For simple usage, you can instantiate an Embedding object using one of the embeddings in the [data directory](src/elementembeddings/data/element_representations/README.md). For this example, let's use the magpie elemental representation. -```python +```pycon # Import the class >>> from elementembeddings.core import Embedding # Load the magpie data ->>> magpie = Embedding.load_data('magpie') +>>> magpie = Embedding.load_data("magpie") ``` We can access some of the properties of the `Embedding` class. For example, we can find the dimensions of the elemental representation and the list of elements for which an embedding exists. -```python +```pycon # Print out some of the properties of the ElementEmbeddings class ->>> print(f'The magpie representation has embeddings of dimension {magpie.dim}') ->>> print(f'The magpie representation contains these elements: \n {magpie.element_list}') # prints out all the elements considered for this representation ->>> print(f'The magpie representation contains these features: \n {magpie.feature_labels}') # Prints out the feature labels of the chosen representation +>>> print(f"The magpie representation has embeddings of dimension {magpie.dim}") +>>> print( +... f"The magpie representation contains these elements: \n {magpie.element_list}" +... ) # prints out all the elements considered for this representation +>>> print( +... f"The magpie representation contains these features: \n {magpie.feature_labels}" +... ) # Prints out the feature labels of the chosen representation The magpie representation has embeddings of dimension 22 The magpie representation contains these elements: @@ -102,26 +106,40 @@ We can quickly generate heatmaps of distance/similarity measures between the ele from elementembeddings.plotter import heatmap_plotter, dimension_plotter import matplotlib.pyplot as plt -magpie.standardise(inplace=True) # Standardises the representation +magpie.standardise(inplace=True) # Standardises the representation -fig, ax = plt.subplots(1, 1, figsize=(6,6)) +fig, ax = plt.subplots(1, 1, figsize=(6, 6)) heatmap_params = {"vmin": -1, "vmax": 1} -heatmap_plotter(embedding=magpie, metric="cosine_similarity",show_axislabels=False,cmap="Blues_r",ax=ax, **heatmap_params) +heatmap_plotter( + embedding=magpie, + metric="cosine_similarity", + show_axislabels=False, + cmap="Blues_r", + ax=ax, + **heatmap_params +) ax.set_title("Magpie cosine similarities") fig.tight_layout() fig.show() - ``` Cosine similarity heatmap of the magpie representation ```python -fig, ax = plt.subplots(1, 1, figsize=(6,6)) - -reducer_params={"n_neighbors": 30, "random_state":42} -scatter_params = {"s":100} - -dimension_plotter(embedding=magpie, reducer="umap",n_components=2,ax=ax,adjusttext=True,reducer_params=reducer_params, scatter_params=scatter_params) +fig, ax = plt.subplots(1, 1, figsize=(6, 6)) + +reducer_params = {"n_neighbors": 30, "random_state": 42} +scatter_params = {"s": 100} + +dimension_plotter( + embedding=magpie, + reducer="umap", + n_components=2, + ax=ax, + adjusttext=True, + reducer_params=reducer_params, + scatter_params=scatter_params, +) ax.set_title("Magpie UMAP (n_neighbours=30)") ax.legend().remove() handles, labels = ax1.get_legend_handles_labels() @@ -149,7 +167,7 @@ The `composition_featuriser` function can be used to featurise the data. The com ```python from elementembeddings.composition import composition_featuriser -df_featurised = composition_featuriser(df, embedding="magpie", stats=["mean","sum"]) +df_featurised = composition_featuriser(df, embedding="magpie", stats=["mean", "sum"]) df_featurised ``` From 1439dbc6986e63e05311d2c985c3b7c9f590289b Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 18 Sep 2024 13:09:16 +0100 Subject: [PATCH 4/6] Pre-commit fixes --- docs/embeddings/element.md | 4 +- docs/tutorials.md | 183 +++++++++++++++--- .../data/element_representations/README.md | 4 +- src/elementembeddings/plotter.py | 2 +- 4 files changed, 166 insertions(+), 27 deletions(-) diff --git a/docs/embeddings/element.md b/docs/embeddings/element.md index e6a71bc..d62caaf 100644 --- a/docs/embeddings/element.md +++ b/docs/embeddings/element.md @@ -162,8 +162,8 @@ The 118 200-dimensional vectors in `random_200_new` were generated using the fol ```python import numpy as np -mu , sigma = 0 , 1 # mean and standard deviation s = np.random.normal(mu, sigma, 1000) -s = np.random.default_rng(seed=42).normal(mu, sigma, (118,200)) +mu, sigma = 0, 1 # mean and standard deviation s = np.random.normal(mu, sigma, 1000) +s = np.random.default_rng(seed=42).normal(mu, sigma, (118, 200)) ``` ### skipatom diff --git a/docs/tutorials.md b/docs/tutorials.md index d2e9462..fa7a9d4 100644 --- a/docs/tutorials.md +++ b/docs/tutorials.md @@ -8,25 +8,150 @@ For simple usage, you can instantiate an Embedding object using one of the embed ```python # Import the class ->>> from elementembeddings.core import Embedding +from elementembeddings.core import Embedding # Load the magpie data ->>> magpie = Embedding.load_data('magpie') +magpie = Embedding.load_data("magpie") ``` We can access some of the properties of the `Embedding` class. For example, we can find the dimensions of the elemental representation and the list of elements for which an embedding exists. ```python # Print out some of the properties of the ElementEmbeddings class ->>> print(f'The magpie representation has embeddings of dimension {magpie.dim}') ->>> print(f'The magpie representation contains these elements: \n {magpie.element_list}') # prints out all the elements considered for this representation ->>> print(f'The magpie representation contains these features: \n {magpie.feature_labels}') # Prints out the feature labels of the chosen representation - -The magpie representation has embeddings of dimension 22 -The magpie representation contains these elements: -['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk'] -The magpie representation contains these features: -['Number', 'MendeleevNumber', 'AtomicWeight', 'MeltingT', 'Column', 'Row', 'CovalentRadius', 'Electronegativity', 'NsValence', 'NpValence', 'NdValence', 'NfValence', 'NValence', 'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSvolume_pa', 'GSbandgap', 'GSmagmom', 'SpaceGroupNumber'] +print(f"The magpie representation has embeddings of dimension {magpie.dim}") +print( + f"The magpie representation contains these elements: \n {magpie.element_list}" +) # prints out all the elements considered for this representation +print( + f"The magpie representation contains these features: \n {magpie.feature_labels}" +) # Prints out the feature labels of the chosen representation + +# The magpie representation has embeddings of dimension 22 +# The magpie representation contains these elements: +[ + "H", + "He", + "Li", + "Be", + "B", + "C", + "N", + "O", + "F", + "Ne", + "Na", + "Mg", + "Al", + "Si", + "P", + "S", + "Cl", + "Ar", + "K", + "Ca", + "Sc", + "Ti", + "V", + "Cr", + "Mn", + "Fe", + "Co", + "Ni", + "Cu", + "Zn", + "Ga", + "Ge", + "As", + "Se", + "Br", + "Kr", + "Rb", + "Sr", + "Y", + "Zr", + "Nb", + "Mo", + "Tc", + "Ru", + "Rh", + "Pd", + "Ag", + "Cd", + "In", + "Sn", + "Sb", + "Te", + "I", + "Xe", + "Cs", + "Ba", + "La", + "Ce", + "Pr", + "Nd", + "Pm", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Ho", + "Er", + "Tm", + "Yb", + "Lu", + "Hf", + "Ta", + "W", + "Re", + "Os", + "Ir", + "Pt", + "Au", + "Hg", + "Tl", + "Pb", + "Bi", + "Po", + "At", + "Rn", + "Fr", + "Ra", + "Ac", + "Th", + "Pa", + "U", + "Np", + "Pu", + "Am", + "Cm", + "Bk", +] +# The magpie representation contains these features: +[ + "Number", + "MendeleevNumber", + "AtomicWeight", + "MeltingT", + "Column", + "Row", + "CovalentRadius", + "Electronegativity", + "NsValence", + "NpValence", + "NdValence", + "NfValence", + "NValence", + "NsUnfilled", + "NpUnfilled", + "NdUnfilled", + "NfUnfilled", + "NUnfilled", + "GSvolume_pa", + "GSbandgap", + "GSmagmom", + "SpaceGroupNumber", +] ``` ### Plotting @@ -37,26 +162,40 @@ We can quickly generate heatmaps of distance/similarity measures between the ele from elementembeddings.plotter import heatmap_plotter, dimension_plotter import matplotlib.pyplot as plt -magpie.standardise(inplace=True) # Standardises the representation +magpie.standardise(inplace=True) # Standardises the representation -fig, ax = plt.subplots(1, 1, figsize=(6,6)) +fig, ax = plt.subplots(1, 1, figsize=(6, 6)) heatmap_params = {"vmin": -1, "vmax": 1} -heatmap_plotter(embedding=magpie, metric="cosine_similarity",show_axislabels=False,cmap="Blues_r",ax=ax, **heatmap_params) +heatmap_plotter( + embedding=magpie, + metric="cosine_similarity", + show_axislabels=False, + cmap="Blues_r", + ax=ax, + **heatmap_params +) ax.set_title("Magpie cosine similarities") fig.tight_layout() fig.show() - ``` ![Magpie cosine similarity heatmap](images/magpie_cosine_sim_heatmap.png) ```python -fig, ax = plt.subplots(1, 1, figsize=(6,6)) - -reducer_params={"n_neighbors": 30, "random_state":42} -scatter_params = {"s":100} - -dimension_plotter(embedding=magpie, reducer="umap",n_components=2,ax=ax,adjusttext=True,reducer_params=reducer_params, scatter_params=scatter_params) +fig, ax = plt.subplots(1, 1, figsize=(6, 6)) + +reducer_params = {"n_neighbors": 30, "random_state": 42} +scatter_params = {"s": 100} + +dimension_plotter( + embedding=magpie, + reducer="umap", + n_components=2, + ax=ax, + adjusttext=True, + reducer_params=reducer_params, + scatter_params=scatter_params, +) ax.set_title("Magpie UMAP (n_neighbours=30)") ax.legend().remove() handles, labels = ax1.get_legend_handles_labels() @@ -84,7 +223,7 @@ The `composition_featuriser` function can be used to featurise the data. The com ```python from elementembeddings.composition import composition_featuriser -df_featurised = composition_featuriser(df, embedding="magpie", stats=["mean","sum"]) +df_featurised = composition_featuriser(df, embedding="magpie", stats=["mean", "sum"]) df_featurised ``` diff --git a/src/elementembeddings/data/element_representations/README.md b/src/elementembeddings/data/element_representations/README.md index e6a71bc..d62caaf 100644 --- a/src/elementembeddings/data/element_representations/README.md +++ b/src/elementembeddings/data/element_representations/README.md @@ -162,8 +162,8 @@ The 118 200-dimensional vectors in `random_200_new` were generated using the fol ```python import numpy as np -mu , sigma = 0 , 1 # mean and standard deviation s = np.random.normal(mu, sigma, 1000) -s = np.random.default_rng(seed=42).normal(mu, sigma, (118,200)) +mu, sigma = 0, 1 # mean and standard deviation s = np.random.normal(mu, sigma, 1000) +s = np.random.default_rng(seed=42).normal(mu, sigma, (118, 200)) ``` ### skipatom diff --git a/src/elementembeddings/plotter.py b/src/elementembeddings/plotter.py index 1874ab1..dc74adf 100644 --- a/src/elementembeddings/plotter.py +++ b/src/elementembeddings/plotter.py @@ -175,7 +175,7 @@ def dimension_plotter( signs = [get_sign(charge) for _, charge in parsed_species] species_labels = [ - rf"$\mathregular{{{element}^{{{abs(charge)}{sign}}}}}}}$" + rf"$\mathregular{{{element}^{{{abs(charge)}{sign}}}}}$" for (element, charge), sign in zip(parsed_species, signs) ] From 5717095468457bdbb276d75c29da1e9264522d25 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 18 Sep 2024 13:09:46 +0100 Subject: [PATCH 5/6] Add support for non-integer oxidation states --- src/elementembeddings/tests/test_utils.py | 3 +++ src/elementembeddings/utils/species.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/elementembeddings/tests/test_utils.py b/src/elementembeddings/tests/test_utils.py index ed0fcc4..854863d 100644 --- a/src/elementembeddings/tests/test_utils.py +++ b/src/elementembeddings/tests/test_utils.py @@ -57,3 +57,6 @@ def test_parse_species(self): assert species.parse_species("Fe1-") == ("Fe", -1) assert species.parse_species("Fe+") == ("Fe", 1) assert species.parse_species("Fe-") == ("Fe", -1) + assert species.parse_species("Fe2.5+") == ("Fe", 2.5) + assert species.parse_species("Fe2.5-") == ("Fe", -2.5) + assert species.parse_species("Fe2.555+") == ("Fe", 2.555) diff --git a/src/elementembeddings/utils/species.py b/src/elementembeddings/utils/species.py index 99d520d..9960bbd 100644 --- a/src/elementembeddings/utils/species.py +++ b/src/elementembeddings/utils/species.py @@ -34,8 +34,8 @@ def _parse_species_old(species: str) -> tuple[str, int]: """ ele = re.match(r"[A-Za-z]+", species).group(0) - charge_match = re.search(r"\d+", species) - ox_state = int(charge_match.group(0)) if charge_match else 0 + charge_match = re.search(r"(\d+\.\d+|\d+)", species) + ox_state = float(charge_match.group(1)) if charge_match else 0 if "-" in species: ox_state *= -1 From cb93b20d23449a7b2c279807b9efbdc44c136384 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 18 Sep 2024 13:11:31 +0100 Subject: [PATCH 6/6] Update version number --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b99c860..2789286 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ module_dir = os.path.dirname(os.path.abspath(__file__)) -VERSION = "0.6" +VERSION = "0.6.1" DESCRIPTION = "Element Embeddings" with open(os.path.join(module_dir, "README.md"), encoding="utf-8") as f: LONG_DESCRIPTION = f.read()