Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Document codebase #194

Merged
merged 11 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
strategy:
matrix:
os: [ 'ubuntu-latest' ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] # "3.13"
python-version: [ "3.9", "3.10", "3.11", "3.12" ] # "3.13"
steps:
- name: Harden Runner
uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1
Expand Down Expand Up @@ -120,15 +120,13 @@ jobs:
environment-file: environment-dev.yml
create-args: >-
python=${{ matrix.python-version }}
- name: Conda and Mamba versions
run: |
echo "micromamba $(micromamba --version)"
micromamba-version: 1.5.10-0 # Pin micromamba version because of following issue: https://github.com/mamba-org/setup-micromamba/issues/225
- name: Install miranda
run: |
python -m pip install --no-deps .
- name: Check versions
run: |
conda list
micromamba list
python -m pip check || true
- name: Test with pytest
run: |
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ repos:
rev: v1.8.0
hooks:
- id: numpydoc-validation
exclude: ^docs/|^tests/|^templates/
exclude: ^docs/|^tests/|^templates/|.*\/_.*\.py$
- repo: https://github.com/Yelp/detect-secrets
rev: v1.5.0
hooks:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Bug fixes
Internal changes
^^^^^^^^^^^^^^^^
* `miranda` now has a security policy (`SECURITY.md`) for disclosing sensitive issues using secure communication channels. This has also been added to the documentation.
* `miranda` now applies the `numpydoc` documentation style to all publicly-exposed docstrings.

.. _changes_0.5.0:

Expand Down
9 changes: 0 additions & 9 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,3 @@ The sources for miranda can be downloaded from the `Github repo`_.
These commands should work most of the time, but if big changes are made to the repository, you might need to remove the environment and create it again.

.. _Github repo: https://github.com/Ouranosinc/miranda

Creating a Conda environment
----------------------------

To create a conda development environment including all miranda dependencies, enter the following command from within your cloned repo::

$ conda create -n my_miranda_env python=3.9 --file=environment.yml
$ conda activate my_miranda_env
$ pip install -e .[dev]
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,9 @@ checks = [
exclude = [
# don't report on objects that match any of these regex
'\.undocumented_method$',
'\.__enter__$',
'\.__eq__$',
'\.__exit__$',
'\.__repr__$'
]
override_SS05 = [
Expand Down
1 change: 0 additions & 1 deletion src/miranda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,4 @@
utils,
validators,
)
from .data import DataBase
from .storage import FileMeta, StorageState
34 changes: 26 additions & 8 deletions src/miranda/archive/_groupings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Grouping functions for files based on different criteria."""

from __future__ import annotations

import logging
Expand Down Expand Up @@ -30,17 +32,22 @@ def group_by_length(
size: int = 10,
sort: bool = False,
) -> list[list[Path]]:
"""Group files by an arbitrary number of file entries.
"""
Group files by an arbitrary number of file entries.

Parameters
----------
files: GeneratorType or list of str or pathlib.Path
size: int
sort: bool
files : GeneratorType or list of str or pathlib.Path
The files to be grouped.
size : int
The number of files to be grouped together.
sort : bool
Sort the files before grouping.

Returns
-------
list[list[pathlib.Path]]
Grouped files.
"""
msg = f"Creating groups of {size} files"
logging.info(msg)
Expand All @@ -67,15 +74,18 @@ def group_by_length(
def group_by_deciphered_date(
files: GeneratorType | list[str | Path],
) -> dict[str, list[Path]]:
"""Find a common date and groups files based on year and month.
"""
Find a common date and groups files based on year and month.

Parameters
----------
files: GeneratorType or list of str or pathlib.Path
files : GeneratorType or list of str or pathlib.Path
The files to be grouped.

Returns
-------
dict[str, list[pathlib.Path]]
Grouped files.
"""
logging.warning("This function doesn't work well with multi-thread processing!")
logging.info("Creating files from deciphered dates.")
Expand Down Expand Up @@ -119,16 +129,20 @@ def group_by_deciphered_date(
def group_by_size(
files: GeneratorType | list[str | Path], size: int = 10 * GiB
) -> list[list[Path]]:
"""Group files up until a desired size and save it as a grouping within a list.
"""
Group files up until a desired size and save it as a grouping within a list.

Parameters
----------
files : GeneratorType or list of str or pathlib.Path
The files to be grouped.
size : int
The maximum size of the group.

Returns
-------
list[list[pathlib.Path]]
Grouped files.
"""
msg = f"Creating groups of files based on size not exceeding: {report_file_size(size)}."

Expand Down Expand Up @@ -157,16 +171,20 @@ def group_by_size(
def group_by_subdirectories(
files: GeneratorType | list[str | Path], within: str | Path | None = None
) -> dict[str, list[Path]]:
"""Group files based on the parent folder that they are located within.
"""
Group files based on the parent folder that they are located within.

Parameters
----------
files : GeneratorType or list of str or pathlib.Path
The files to be grouped.
within : str or pathlib.Path
The parent folder to group the files by.

Returns
-------
dict[str, list[pathlib.Path]]
Grouped files.
"""
if not within:
within = Path.cwd()
Expand Down
16 changes: 13 additions & 3 deletions src/miranda/archive/_selection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Select files by the date on which they were last modified."""

from __future__ import annotations

import logging
Expand All @@ -20,22 +22,30 @@ def select_by_date_modified(
day: int | None,
*,
suffixes: str = "nc",
date: datetime,
date: datetime.date,
) -> list[Path]:
"""Select files by the date on which they were last modified.
"""
Select files by the date on which they were last modified.

Parameters
----------
source : str or Path
The directory to search for files.
year : int
The year of the date to select.
month : int
The month of the date to select
day : int
The day of the date to select.
suffixes : str
date : datetime.date
The file suffixes to search.
date : date
The date to select.

Returns
-------
list of Path
The selected files.
"""
if date:
date_selected = date
Expand Down
71 changes: 64 additions & 7 deletions src/miranda/convert/deh.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
__all__ = ["open_txt"]

# CMOR-like attributes
cmor = json.load(open(Path(__file__).parent / "data" / "deh_cf_attrs.json"))[ # noqa
cmor = json.load(
Path(__file__).parent.joinpath("data").joinpath("deh_cf_attrs.json").open()
)[ # noqa
"variable_entry"
]

Expand All @@ -34,7 +36,19 @@


def extract_daily(path: os.PathLike | str) -> tuple[dict, pd.DataFrame]:
"""Extract data and metadata from DEH (MELCC) stream flow file."""
"""
Extract data and metadata from DEH (MELCCFP) stream flow file.

Parameters
----------
path : os.PathLike or str
The path to the file.

Returns
-------
tuple[dict, pd.DataFrame]
The metadata and the data.
"""
with Path(path).open("r", encoding="latin1") as fh:
txt = fh.read()
txt = re.sub(" +", " ", txt)
Expand Down Expand Up @@ -70,7 +84,23 @@ def extract_daily(path: os.PathLike | str) -> tuple[dict, pd.DataFrame]:


def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict) -> xr.Dataset:
"""Return CF-compliant metadata."""
"""
Return CF-compliant metadata.

Parameters
----------
meta : dict
The metadata dictionary.
data : pd.DataFrame
The data DataFrame.
cf_table : dict
The CF table dictionary.

Returns
-------
xr.Dataset
The CF-compliant dataset.
"""
ds = xr.Dataset()

ds["q"] = xr.DataArray(data["Débit"], attrs=cf_table["q"])
Expand All @@ -84,7 +114,20 @@ def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict) -> xr.Dataset:
attrs={"long_name": "drainage area", "units": "km2"},
)

def parse_dms(coord):
def _parse_dms(coord: str) -> float:
"""
Parse dimensions.

Parameters
----------
coord : str
The coordinate string.

Returns
-------
float
The parsed coordinate.
"""
deg, minutes, seconds, _ = re.split("[°'\"]", coord)
if float(deg) > 0:
return round(
Expand All @@ -94,15 +137,15 @@ def parse_dms(coord):

coords = meta["coords"].split(" // ")
ds["lat"] = xr.DataArray(
parse_dms(coords[0]),
_parse_dms(coords[0]),
attrs={
"standard_name": "latitude",
"long_name": "latitude",
"units": "decimal_degrees",
},
)
ds["lon"] = xr.DataArray(
parse_dms(coords[1]),
_parse_dms(coords[1]),
attrs={
"standard_name": "longitude",
"long_name": "longitude",
Expand All @@ -122,6 +165,20 @@ def parse_dms(coord):


def open_txt(path: str | Path, cf_table: dict | None = cmor) -> xr.Dataset:
"""Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes."""
"""
Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes.

Parameters
----------
path : str or Path
The path to the file.
cf_table : dict, optional
The CF table dictionary.

Returns
-------
xr.Dataset
The CF-compliant dataset.
"""
meta, data = extract_daily(path)
return to_cf(meta, data, cf_table)
41 changes: 38 additions & 3 deletions src/miranda/convert/eccc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,16 @@


def convert_canswe(file: str | Path, output: str | Path):
"""Convert the CanSWE netCDF files to production-ready CF-compliant netCDFs."""
"""
Convert the CanSWE netCDF files to production-ready CF-compliant netCDFs.

Parameters
----------
file : str or Path
The path to the CanSWE netCDF file.
output : str or Path
The output directory.
"""
ds = xr.open_dataset(file)
ds = ds.set_coords(
[
Expand All @@ -31,14 +40,40 @@ def convert_canswe(file: str | Path, output: str | Path):
]
)

def clean_flags(variable):
def clean_flags(variable: xr.DataArray) -> dict:
"""
Clean the flags.

Parameters
----------
variable : xr.DataArray
The flag variable.

Returns
-------
dict
The cleaned flags.
"""
values = list(map(bytes.decode, np.sort(pd.unique(variable.values.flatten()))))
values[0] = "n"
mean_dict = parse_desc(variable.description)
meanings = " ".join(np.array([mean_dict[v] for v in values]))
return dict(flag_values=values, flag_meanings=meanings)

def parse_desc(desc):
def parse_desc(desc: str) -> dict:
"""
Parse the description attribute.

Parameters
----------
desc : str
The description attribute.

Returns
-------
dict
The parsed description.
"""
d = dict(
map(
lambda kv: (kv[0].strip(), "_".join(kv[1].replace(">", "").split())),
Expand Down
Loading