From 36c529add67a6863e633919b6b7aba0d34face48 Mon Sep 17 00:00:00 2001 From: Matthew Carbone Date: Mon, 18 Sep 2023 11:02:41 -0400 Subject: [PATCH 1/5] Update for Materials Project v2 API --- .github/workflows/ci.yml | 2 +- lightshow/__init__.py | 6 +- lightshow/_tests/conftest.py | 2 +- .../_tests/structure_files/mvc-11115/POSCAR | 20 -- lightshow/_tests/test_database.py | 4 +- lightshow/_tests/test_vasp.py | 5 +- lightshow/database.py | 227 +++--------------- pyproject.toml | 3 +- 8 files changed, 44 insertions(+), 225 deletions(-) delete mode 100644 lightshow/_tests/structure_files/mvc-11115/POSCAR diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4e42ffdc..8abc1bf6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: name: Run unit testing suite env: - PMG_API_KEY: ${{ secrets.PMG_API_KEY }} + MP_API_KEY: ${{ secrets.MP_API_KEY }} runs-on: ${{ matrix.os }} strategy: diff --git a/lightshow/__init__.py b/lightshow/__init__.py index ef4e24bd..44cc7ad6 100644 --- a/lightshow/__init__.py +++ b/lightshow/__init__.py @@ -10,15 +10,15 @@ def _get_API_key_from_environ(): - """Checks for an environment variable PMG_API_KEY. If does not exist, - returns None. + """Checks for an environment variable MP_API_KEY. If does not exist, + returns None. Note that this is now for the Materials Project v2 api. Returns ------- str """ - return environ.get("PMG_API_KEY", None) + return environ.get("MP_API_KEY", None) def _get_POTCAR_DIRECTORY_from_environ(): diff --git a/lightshow/_tests/conftest.py b/lightshow/_tests/conftest.py index 9f51f579..5de37259 100644 --- a/lightshow/_tests/conftest.py +++ b/lightshow/_tests/conftest.py @@ -19,7 +19,7 @@ def test_structure_names(): return [ "mp-390", - "mvc-11115", + # "mvc-11115", # No longer in v2 API "mp-1215", "mp-1840", "mp-2657", diff --git a/lightshow/_tests/structure_files/mvc-11115/POSCAR b/lightshow/_tests/structure_files/mvc-11115/POSCAR deleted file mode 100644 index e0c0e350..00000000 --- a/lightshow/_tests/structure_files/mvc-11115/POSCAR +++ /dev/null @@ -1,20 +0,0 @@ -Ti4 O8 -1.0 -5.067808 -3.010778 0.000000 -5.067808 3.010778 0.000000 -3.279109 0.000000 4.898460 -Ti O -4 8 -direct -0.242348 0.242348 0.242348 Ti -0.631036 0.099237 0.631036 Ti -0.631036 0.631036 0.099237 Ti -0.099237 0.631036 0.631036 Ti -0.870171 0.365406 0.365406 O -0.365406 0.365406 0.870171 O -0.365406 0.870171 0.365406 O -0.382261 0.382261 0.382261 O -0.865247 0.865247 0.865247 O -0.873676 0.423051 0.873676 O -0.873676 0.873676 0.423051 O -0.423051 0.873676 0.873676 O diff --git a/lightshow/_tests/test_database.py b/lightshow/_tests/test_database.py index ff8a4f39..4d282db1 100644 --- a/lightshow/_tests/test_database.py +++ b/lightshow/_tests/test_database.py @@ -31,7 +31,7 @@ def test_database_from_disk(database_from_file, test_structure_names): def test_from_materials_project(test_structure_names): try: - dat = Database.from_materials_project(test_structure_names) + dat = Database.from_materials_project(material_ids=test_structure_names) dat.initialize_supercells(9.0) assert set(dat.structures.keys()) == set(test_structure_names) assert set(dat.supercells.keys()) == set(test_structure_names) @@ -43,7 +43,7 @@ def test_from_materials_project(test_structure_names): "mpid", [ "mp-390", - "mvc-11115", + # "mvc-11115", "mp-1215", "mp-1840", "mp-2657", diff --git a/lightshow/_tests/test_vasp.py b/lightshow/_tests/test_vasp.py index e30acb94..8d3fd45d 100644 --- a/lightshow/_tests/test_vasp.py +++ b/lightshow/_tests/test_vasp.py @@ -12,7 +12,10 @@ @pytest.mark.parametrize( "mpid", - ["mp-390", "mvc-11115"], + [ + "mp-390", + # "mvc-11115" + ], ) def test_write(mpid, dummy_potcar_file_directory, database_from_file, tmp_path): # Load it all in diff --git a/lightshow/database.py b/lightshow/database.py index 75f367be..04a166d9 100644 --- a/lightshow/database.py +++ b/lightshow/database.py @@ -4,7 +4,6 @@ utilizing existing data the user may have on their hard drive.""" from datetime import datetime -from concurrent.futures import ThreadPoolExecutor import json import os from pathlib import Path @@ -12,8 +11,8 @@ from warnings import warn from monty.json import MSONable +from mp_api.client import MPRester from pymatgen.core.structure import Structure -from pymatgen.ext.matproj import MPRester, MPRestError from tqdm import tqdm from lightshow import _get_API_key_from_environ @@ -39,97 +38,6 @@ def _delete_common_strings(old_list_of_strings): ] -def _fetch_from_MP(job): - """Uses the provided MPID to fetch the structure data. - - Parameters - ---------- - job : list - A list containing the parameters of the function, see below. - - Notes - ----- - For each element of ``job``, we have the following, in order: - - mpr : pymatgen.ext.matproj.MPRester - Interface to the Materials Project REST API. - mpid : str - The specific mpid to pull. - metadata_keys : list of str, optional - The Materials Project metadata contains a huge amount of information. - If not ``None``, these are the only keys that are kept in the pulled - metadata object. - - Returns - ------- - dict - The structure (:class:`pymatgen.core.structure.Structure`) of interest - and the specified metadata, as well as the mpid for reference. - """ - - mpr = job[0] - mpid = job[1] - metadata_keys = job[2] - - try: - metadata = mpr.get_doc(mpid) - except MPRestError as error: - warn(f"MPRestError pulling mpid={mpid}, error: {error}") - return {"mpid": mpid, "structure": None, "metadata": None} - - if metadata_keys is not None: - metadata = {key: metadata[key] for key in metadata_keys} - metadata["downloaded_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - # The structure is precisely in the data pulled from get_doc: - structure = Structure.from_dict(metadata.pop("structure")) - - return {"mpid": mpid, "structure": structure, "metadata": metadata} - - -def _from_mpids_list( - mpids, api_key, metadata_keys, verbose=True, concurrent_threads=2 -): - """Makes one large API call to the Materials Project database and pulls the - relevant structural files given a list of Materials Project ID's (mpids). - - Parameters - ---------- - mpids : list of str - List of Materials Project IDs to pull. - api_key : str, optional - Materials Project API key. - metadata_keys : bool, optional - If True, will only - verbose : bool, optional - If True, will use tqdm to print a progress bar. - concurrent_threads : int, optional - The number of concurrent threads used in the ThreadPoolExecutor. - - Returns - ------- - dict - A dictionary containing the structures (with keys as the mpids) and - metadata (with the same keys). - """ - - with MPRester(api_key) as mpr: - jobs = [[mpr, mpid, metadata_keys] for mpid in mpids] - with ThreadPoolExecutor(max_workers=concurrent_threads) as executor: - results = list( - tqdm(executor.map(_fetch_from_MP, jobs), total=len(jobs)) - ) - - structures = { - x["mpid"]: x["structure"] for x in results if x["structure"] is not None - } - metadatas = { - x["mpid"]: x["metadata"] for x in results if x["structure"] is not None - } - - return {"structures": structures, "metadata": metadatas} - - def _get_api_key(api_key): if api_key is None: api_key = _get_API_key_from_environ() @@ -216,121 +124,48 @@ def from_files(cls, root, filename="CONTCAR", cleanup_paths=True): return kls @classmethod - def from_materials_project( - cls, - query, - query_type="mpids", - api_key=None, - metadata_keys=[ - "created_at", - "blessed_tasks", - "pseudo_potential", - "spacegroup", - "_id", - "structure", - "icsd_ids", - "e_above_hull", - "formation_energy_per_atom", - "band_gap", - "diel", - ], - verbose=True, - concurrent_threads=2, - ): + def from_materials_project(cls, **kwargs): """Constructs the :class:`.Database` object by pulling structures and - metadata directly from the Materials Project. The following query types - are allowed: - - * ``query_type == mpids``, the ``query`` argument is simply a list of - Materials Project IDs. - * ``query_type == patterns``, the ``query`` argument is a list - of patterns, e.g. ``[Ti-O, Ti-O-*, Cu-O-*]``. - * ``query_type == mp_query``, then the ``query`` argument is - just a dict that is passed directly to the ``mpr.query`` method, - where ``mpr`` is the MPRester object. + metadata directly from the Materials Project. This is a simple + passthrough method which utilizes the MPRester.materials.summary.search + API of the Materials Project v2 API. Parameters ---------- - query : list or dict - The query itself, the form of which is determined by - ``query_type``. - query_type : str, optional - There are three different types of allowed queries: ``mpids``, - ``patterns``, or ``mp_query``. See above for the allowed query - types. - api_key : str, optional - Materials Project API key. If None (not provided), looks for the - environment variable ``PMG_API_KEY``. - metadata_keys : list, optional - The Materials Project metadata contains a huge amount of - information. If not ``None``, these are the only keys that are kept - in the pulled metadata object. - verbose : bool, optional - If True, uses the :class:`tqdm` progress bar. Otherwise will - silence it. - - Returns - ------- - Database + **kwargs + Description Examples -------- - Construct a :class:`.Database` via directly pulling certain materials - by MPID. - .. code-block:: python + Deleted Parameters + ------------------ + mpr_query_kwargs : dict + Direct passthrough to MPRester.materials.summary.search. See + examples below. + api_key : None, optional + API key which can either be provided directly or is read from + the MP_API_KEY environment variable. + + No Longer Returned + ------------------ + Database + """ - database = Database.from_materials_project(["mp-390", "mvc-1115"]) + api_key = _get_api_key(kwargs.get("api_key")) - Construct a :class:`.Database` via pulling all materials consistent - with certain patterns. For example, to pull all binary and ternary - titanium oxide compounds: + try: + kwargs.pop("api_key") + except KeyError: + pass - .. code-block:: python + with MPRester(api_key) as mpr: + searched = mpr.materials.summary.search(**kwargs) - database = Database.from_materials_project( - ["Ti-O", "Ti-O-*"], query_type="patterns" - ) - """ + structures = {s.material_id.string: s.structure for s in searched} + metadata = {s.material_id.string: s.dict() for s in searched} - api_key = _get_api_key(api_key) - - # Nothing to do here - if query_type == "mpids": - mpids = query - - # Convert the patterns into a list of mpids - elif query_type == "patterns": - mpids = [] - with MPRester(api_key) as mpr: - for pattern in query: - data = mpr.get_data(pattern, prop="material_id") - mpids.extend([xx["material_id"] for xx in data]) - - # Convert the raw query itself to a list of mpids - elif query_type == "mp_query": - with MPRester(api_key) as mpr: - materials_list = mpr.query(**query) - mpids = [xx["material_id"] for xx in materials_list] - - # Otherwise we error and terminate - else: - raise ValueError(f"Unknown query {query_type}") - - # Get all of the data as a dictionary - data = _from_mpids_list( - mpids, - api_key, - metadata_keys, - verbose=verbose, - concurrent_threads=concurrent_threads, - ) - - return cls( - structures=data["structures"], - metadata=data["metadata"], - supercells=dict(), - ) + return cls(structures=structures, metadata=metadata, supercells=dict()) def initialize_supercells(self, supercell_cutoff=9.0): """Initializes the supercells from the structures pulled from the @@ -504,7 +339,7 @@ def write_unit_cells(self, root, pbar=False): for key, structure in tqdm(self._structures.items(), disable=not pbar): fname = Path(root) / key / "POSCAR" - structure.to(fmt="POSCAR", filename=fname) + structure.to(fmt="POSCAR", filename=str(fname)) def write( self, diff --git a/pyproject.toml b/pyproject.toml index bd675d4c..edb043a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,8 @@ dependencies = [ "numpy", "tqdm", "monty", - "pymatgen<=2022.5.26", + "pymatgen>=2023.9.10", + "mp-api", "ase" ] From c8da97b7e7bf8d404ee7742e602431064aa09552 Mon Sep 17 00:00:00 2001 From: Matthew Carbone Date: Mon, 18 Sep 2023 11:06:14 -0400 Subject: [PATCH 2/5] No longer compatible with py3.7 --- .github/workflows/ci.yml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8abc1bf6..db068b12 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macOS-latest] - python-version: [3.7, 3.8, 3.9, '3.10', 3.11] + python-version: [3.8, 3.9, '3.10', 3.11] steps: diff --git a/pyproject.toml b/pyproject.toml index edb043a0..9d5e0480 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,13 +21,12 @@ maintainers = [ ] description = "A one-stop-shop for writing computational spectroscopy input files" readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.8" license = {"file" = "LICENSE"} classifiers = [ "Development Status :: 2 - Pre-Alpha", "Natural Language :: English", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3", From 8ddb244d034e920f40281557d896c4397c1269f7 Mon Sep 17 00:00:00 2001 From: Matthew Carbone Date: Mon, 18 Sep 2023 11:15:23 -0400 Subject: [PATCH 3/5] Only py>=3.9 --- .github/workflows/ci.yml | 2 +- pyproject.toml | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index db068b12..c0e334bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macOS-latest] - python-version: [3.8, 3.9, '3.10', 3.11] + python-version: [3.9, '3.10', 3.11] steps: diff --git a/pyproject.toml b/pyproject.toml index 9d5e0480..45c32a00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,14 +21,12 @@ maintainers = [ ] description = "A one-stop-shop for writing computational spectroscopy input files" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {"file" = "LICENSE"} classifiers = [ "Development Status :: 2 - Pre-Alpha", "Natural Language :: English", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering", "Intended Audience :: Developers", From 9c197eb6cf26a802ff45308ea27dfaee76ad1e12 Mon Sep 17 00:00:00 2001 From: Matthew Carbone Date: Mon, 18 Sep 2023 11:18:27 -0400 Subject: [PATCH 4/5] Update readme with new v2 instructions --- README.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 03e33e26..bd5c5ad1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@
- ![sysfs line plot](https://raw.githubusercontent.com/AI-multimodal/Lightshow/master/docs/_static/images/lightshow.jpg) [![image](https://joss.theoj.org/papers/a9cabcd7f4b85a926a797997c6622b43/status.svg)](https://joss.theoj.org/papers/a9cabcd7f4b85a926a797997c6622b43) @@ -8,9 +7,6 @@ [![image](https://app.codacy.com/project/badge/Grade/d31a4e18672c4d71bbaafa719181c140)](https://www.codacy.com/gh/AI-multimodal/Lightshow/dashboard?utm_source=github.com&utm_medium=referral&utm_content=AI-multimodal/Lightshow&utm_campaign=Badge_Grade) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![python](https://img.shields.io/badge/-Python_3.7+-blue?logo=python&logoColor=white)](https://github.com/pre-commit/pre-commit) - - -
------------------------------------------------------------------------ @@ -57,13 +53,13 @@ To install Lightshow, simply use `pip` pip install lightshow ``` -Make sure you've set your Pymatgen legacy API key as well! +Make sure you've set your Materials Project v2 API key as well! You can find some documentation on how to query data [here](https://docs.materialsproject.org/downloading-data/using-the-api/querying-data) and how to set up your new API key [here](https://next-gen.materialsproject.org/api). ```bash -export PMG_API_KEY="your_legacy_materials_project_key" +export MP_API_KEY="your_Materials_Project_v2_API_key" ``` -(or preferably, add `PMG_API_KEY` to your bash profile). +(or preferably, add `MP_API_KEY` to your bash profile). More details can be found at our [documentation](https://ai-multimodal.github.io/Lightshow/installation.html). From a5bbf17c755e3a4a48e611315612bd67f66a728f Mon Sep 17 00:00:00 2001 From: Matthew Carbone Date: Mon, 18 Sep 2023 11:22:19 -0400 Subject: [PATCH 5/5] Update docs accordingly --- docs/source/quickstart.rst | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 21a1d7bc..e48b52af 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -37,15 +37,14 @@ Begin by importing the ``Database`` object. Databases from the Materials Project ------------------------------------ -The database is designed to be constructed via classmethods. The primary classmethod we recommend using is ``from_materials_project``. It interfaces directly with the ``MPRester`` object to make queries and pull data locally. There are three ways to do this: +The database is designed to be constructed via classmethods. The primary classmethod we recommend using is ``from_materials_project``. It interfaces directly with the ``mp_api.client.MPRester`` object to make queries and pull data locally. As of `Lightshow==1.0.0`, this interfaces directly with the `Materials Project v2 API `_ and is a simple passthrough. You should type something like ``mpr.materials.summary.search?`` (check its docstring) for the options you can pass directly through to ``Database.from_materials_project``. #. Directly pull a list of Materials Project IDs .. code-block:: python database = Database.from_materials_project( - query=["mp-390", "mvc-11115"], - query_type="mpids", + material_ids=["mp-390"], api_key=API_KEY ) @@ -54,28 +53,13 @@ The database is designed to be constructed via classmethods. The primary classme .. code-block:: python database = Database.from_materials_project( - query=["Ti-O", "Ti-O-*"], - query_type="patterns", - api_key=API_KEY - ) - -#. Via direct REST query. See the appropriate Pymatgen docs `here `__ for more details. - - .. code-block:: python - - database = Database.from_materials_project( - query={ - "criteria": ..., - "properties": ..., - ... - }, - query_type="mp_query", + chemsys=["Ti-O", "Ti-O-*"], api_key=API_KEY ) .. note:: - While the `Pymatgen API Key `_ can be provided manually during the use of ``from_materials_project``, we highly recommend setting it in the environment variable ``PMG_API_KEY``. If ``api_key`` in the above arguments is not provided or is set to ``None``, **Lightshow** will check for this environment variable and use it instead. + While the Pymatgen API Key can be provided manually during the use of ``from_materials_project``, we highly recommend setting it in the environment variable ``MP_API_KEY``. If ``api_key`` in the above arguments is not provided or is set to ``None``, **Lightshow** will check for this environment variable and use it instead. Once the ``database`` has been built, three properties are accessible: