Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consolidate on pathlib module #306

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/access_nri_intake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
__version__ = _version.get_versions()["version"]

CATALOG_LOCATION = "/g/data/xp65/public/apps/access-nri-intake-catalog/catalog.yaml"
USER_CATALOG_LOCATION = Path.home() / ".access_nri_intake_catalog/catalog.yaml"
USER_CATALOG_LOCATION = str(Path.home() / ".access_nri_intake_catalog/catalog.yaml")
9 changes: 5 additions & 4 deletions src/access_nri_intake/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import re
import subprocess
import sys
from pathlib import Path
from typing import Callable


Expand Down Expand Up @@ -129,7 +130,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
rootdirs = []

for _ in range(3):
dirname = os.path.basename(root)
dirname = Path(root).name
if dirname.startswith(parentdir_prefix):
return {
"version": dirname[len(parentdir_prefix) :],
Expand All @@ -139,7 +140,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
"date": None,
}
rootdirs.append(root)
root = os.path.dirname(root) # up a level
root = Path(root).parent # up a level

if verbose:
print(
Expand Down Expand Up @@ -658,12 +659,12 @@ def get_versions():
pass

try:
root = os.path.realpath(__file__)
root = Path(__file__).resolve()
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for _ in cfg.versionfile_source.split("/"):
root = os.path.dirname(root)
root = Path(root).parent
except NameError:
return {
"version": "0+unknown",
Expand Down
5 changes: 2 additions & 3 deletions src/access_nri_intake/catalog/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

"""Manager for adding/updating intake sources in an intake-dataframe-catalog like the ACCESS-NRI catalog"""

import os
from pathlib import Path

import intake
Expand Down Expand Up @@ -106,8 +105,8 @@ def build_esm(
metadata = metadata or {}
directory = directory or ""

json_file = os.path.abspath(f"{os.path.join(directory, name)}.json")
if os.path.isfile(json_file):
json_file = (Path(directory) / f"{name}.json").absolute()
if json_file.is_file():
if not overwrite:
raise CatalogManagerError(
f"An Intake-ESM datastore already exists for {name}. To overwrite, "
Expand Down
41 changes: 19 additions & 22 deletions src/access_nri_intake/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import argparse
import datetime
import logging
import os
import re
from collections.abc import Sequence
from pathlib import Path
Expand All @@ -29,7 +28,7 @@ class MetadataCheckError(Exception):


def _parse_build_inputs(
config_yamls: list[str], build_path: str, data_base_path: str
config_yamls: list[str | Path], build_path: str | Path, data_base_path: str | Path
) -> list[tuple[str, dict]]:
"""
Parse build inputs into a list of tuples of CatalogManager methods and args to
Expand All @@ -49,7 +48,7 @@ def _parse_build_inputs(
if builder:
method = "build_esm"
config_args["builder"] = getattr(builders, builder)
config_args["directory"] = build_path
config_args["directory"] = str(build_path)
config_args["overwrite"] = True
else:
method = "load"
Expand All @@ -58,16 +57,16 @@ def _parse_build_inputs(
source_args = config_args

source_args["path"] = [
os.path.join(data_base_path, _) for _ in kwargs.pop("path")
str(Path(data_base_path) / _) for _ in kwargs.pop("path")
]
metadata_yaml = kwargs.pop("metadata_yaml")
try:
metadata = load_metadata_yaml(
os.path.join(data_base_path, metadata_yaml), EXP_JSONSCHEMA
Path(data_base_path) / metadata_yaml, EXP_JSONSCHEMA
)
except jsonschema.exceptions.ValidationError:
raise MetadataCheckError(
f"Failed to validate metadata.yaml @ {os.path.dirname(metadata_yaml)}. See traceback for details."
f"Failed to validate metadata.yaml @ {Path(metadata_yaml).parent}. See traceback for details."
)
source_args["name"] = metadata["name"]
source_args["description"] = metadata["description"]
Expand Down Expand Up @@ -203,18 +202,18 @@ def build(argv: Sequence[str] | None = None):
)

# Create the build directories
build_base_path = os.path.abspath(build_base_path)
build_path = os.path.join(build_base_path, version, "source")
metacatalog_path = os.path.join(build_base_path, version, catalog_file)
os.makedirs(build_path, exist_ok=True)
build_base_path = Path(build_base_path).absolute()
build_path = Path(build_base_path) / version / "source"
metacatalog_path = Path(build_base_path) / version / catalog_file
Path(build_path).mkdir(parents=True, exist_ok=True)

# Parse inputs to pass to CatalogManager
parsed_sources = _parse_build_inputs(config_yamls, build_path, data_base_path)
_check_build_args([parsed_source[1] for parsed_source in parsed_sources])

# Get the project storage flags
def _get_project(path):
match = re.match(r"/g/data/([^/]*)/.*", path)
match = re.match(r"/g/data/([^/]*)/.*", str(path))
return match.groups()[0] if match else None

project = set()
Expand All @@ -240,8 +239,8 @@ def _get_project(path):
cat.description = "ACCESS-NRI intake catalog"
yaml_dict = yaml.safe_load(cat.yaml())

yaml_dict["sources"]["access_nri"]["args"]["path"] = os.path.join(
build_base_path, "{{version}}", catalog_file
yaml_dict["sources"]["access_nri"]["args"]["path"] = str(
Path(build_base_path) / "{{version}}" / catalog_file
)
yaml_dict["sources"]["access_nri"]["args"]["mode"] = "r"
yaml_dict["sources"]["access_nri"]["metadata"] = {
Expand All @@ -257,7 +256,7 @@ def _get_project(path):

if update:
cat_loc = get_catalog_fp(basepath=catalog_base_path)
existing_cat = os.path.exists(cat_loc)
existing_cat = Path(cat_loc).exists()

# See if there's an existing catalog
if existing_cat:
Expand Down Expand Up @@ -305,10 +304,7 @@ def _get_project(path):
vers_str = vmin_old
else:
vers_str = f"{vmin_old}-{vmax_old}"
os.rename(
cat_loc,
os.path.join(os.path.dirname(cat_loc), f"catalog-{vers_str}.yaml"),
)
Path(cat_loc).rename(Path(cat_loc).parent / f"catalog-{vers_str}.yaml")
yaml_dict = _set_catalog_yaml_version_bounds(
yaml_dict, version, version
)
Expand All @@ -333,9 +329,10 @@ def _get_project(path):
# No existing catalog, so set min = max = current version,
# unless there are folders with the right names in the write
# directory
existing_vers = os.listdir(build_base_path)
existing_vers = [
v for v in existing_vers if re.match(CATALOG_NAME_FORMAT, v)
v.name
for v in build_base_path.iterdir()
if re.match(CATALOG_NAME_FORMAT, v.name)
]
if len(existing_vers) > 0:
yaml_dict = _set_catalog_yaml_version_bounds(
Expand Down Expand Up @@ -389,7 +386,7 @@ def metadata_validate(argv: Sequence[str] | None = None):
files = args.file

for f in files:
if os.path.isfile(f):
if Path(f).is_file():
print(f"Validating {f}... ")
try:
load_metadata_yaml(f, EXP_JSONSCHEMA)
Expand Down Expand Up @@ -423,7 +420,7 @@ def metadata_template(loc: str | Path | None = None) -> None:
"""

if loc is None:
loc = os.getcwd()
loc = Path.cwd()

argparse.ArgumentParser(description="Generate a template for metadata.yaml")

Expand Down
5 changes: 2 additions & 3 deletions src/access_nri_intake/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"""General utility functions for access-rni-intake"""

import json
import os
from importlib import resources as rsr
from pathlib import Path
from warnings import warn
Expand Down Expand Up @@ -44,7 +43,7 @@ def get_jsonschema(metadata_file: str, required: list) -> tuple[dict, dict]:
return schema, schema_required


def load_metadata_yaml(path: str, jsonschema: dict) -> dict:
def load_metadata_yaml(path: str | Path, jsonschema: dict) -> dict:
"""
Load a metadata.yaml file, leaving dates as strings, and validate against a jsonschema,
allowing for tuples as arrays
Expand Down Expand Up @@ -143,7 +142,7 @@ def get_catalog_fp(basepath=None):
if not isinstance(basepath, Path):
basepath = Path(basepath)
return basepath / "catalog.yaml"
if os.path.isfile(USER_CATALOG_LOCATION):
if Path(USER_CATALOG_LOCATION).is_file():
warn(
(
"User defined catalog found in `$HOME/.access_nri_intake_catalog`. "
Expand Down
6 changes: 3 additions & 3 deletions tests/e2e/test_end_to_end.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path

import intake
import numpy as np
Expand Down Expand Up @@ -41,14 +41,14 @@ def metacat(BASE_DIR, config_dir, v_num):
"--no_update",
]
)
cat_path = os.path.join(BASE_DIR, v_num, "metacatalog.csv")
cat_path = Path(BASE_DIR) / v_num / "metacatalog.csv"
metacat = intake.open_df_catalog(cat_path)
yield metacat


@e2e
def test_catalog_subset_exists(BASE_DIR, v_num, metacat):
assert os.path.exists(os.path.join(BASE_DIR, v_num, "metacatalog.csv"))
assert (Path(BASE_DIR) / v_num / "metacatalog.csv").exists()


@e2e
Expand Down
18 changes: 9 additions & 9 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_bad_metadata_validation_raises(test_data, instance, no_errs, bad_kw):
assert f"| {kw}" in err_msg, f"Can't see expected specific error for {kw}"


@mock.patch("os.path.isfile")
@mock.patch("pathlib.Path.is_file")
@pytest.mark.parametrize("isfile_return", [True, False])
@pytest.mark.parametrize(
"basepath",
Expand All @@ -225,22 +225,22 @@ def test_bad_metadata_validation_raises(test_data, instance, no_errs, bad_kw):
Path("/path/like/string"),
],
)
def test_get_catalog_fp_basepath(mock_isfile, isfile_return, basepath):
mock_isfile.return_value = isfile_return
def test_get_catalog_fp_basepath(mock_is_file, isfile_return, basepath):
mock_is_file.return_value = isfile_return
assert str(get_catalog_fp(basepath=basepath)) == "/path/like/string/catalog.yaml"


@mock.patch("os.path.isfile", return_value=True)
def test_get_catalog_fp_local(mock_isfile):
@mock.patch("pathlib.Path.is_file", return_value=True)
def test_get_catalog_fp_local(mock_is_file):
"""
Check that we get pointed back to the user catalog
"""
assert get_catalog_fp() == USER_CATALOG_LOCATION
assert str(get_catalog_fp()) == USER_CATALOG_LOCATION


@mock.patch("os.path.isfile", return_value=False)
def test_get_catalog_fp_xp65(mock_isfile):
@mock.patch("pathlib.Path.is_file", return_value=False)
def test_get_catalog_fp_xp65(mock_is_file):
"""
Check that we get pointed back to the user catalog
"""
assert get_catalog_fp() == CATALOG_LOCATION
assert str(get_catalog_fp()) == CATALOG_LOCATION
Loading