Ouranosinc · Zeitsperre · Oct 3, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -60,7 +60,7 @@ jobs:
     strategy:
       matrix:
         os: [ 'ubuntu-latest' ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] # "3.13"
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ] # "3.13"
     steps:
       - name: Harden Runner
         uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1
@@ -120,15 +120,13 @@ jobs:
           environment-file: environment-dev.yml
           create-args: >-
             python=${{ matrix.python-version }}
-      - name: Conda and Mamba versions
-        run: |
-          echo "micromamba $(micromamba --version)"
+          micromamba-version: 1.5.10-0 # Pin micromamba version because of following issue: https://github.com/mamba-org/setup-micromamba/issues/225
       - name: Install miranda
         run: |
           python -m pip install --no-deps .
       - name: Check versions
         run: |
-          conda list
+          micromamba list
           python -m pip check || true
       - name: Test with pytest
         run: |

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -67,7 +67,7 @@ repos:
     rev: v1.8.0
     hooks:
       - id: numpydoc-validation
-        exclude: ^docs/|^tests/|^templates/
+        exclude: ^docs/|^tests/|^templates/|.*\/_.*\.py$
   - repo: https://github.com/Yelp/detect-secrets
     rev: v1.5.0
     hooks:

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -25,6 +25,7 @@ Bug fixes
 Internal changes
 ^^^^^^^^^^^^^^^^
 * `miranda` now has a security policy (`SECURITY.md`) for disclosing sensitive issues using secure communication channels. This has also been added to the documentation.
+* `miranda` now applies the `numpydoc` documentation style to all publicly-exposed docstrings.
 
 .. _changes_0.5.0:
 

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -105,12 +105,3 @@ The sources for miranda can be downloaded from the `Github repo`_.
     These commands should work most of the time, but if big changes are made to the repository, you might need to remove the environment and create it again.
 
 .. _Github repo: https://github.com/Ouranosinc/miranda
-
-Creating a Conda environment
-----------------------------
-
-To create a conda development environment including all miranda dependencies, enter the following command from within your cloned repo::
-
-    $ conda create -n my_miranda_env python=3.9 --file=environment.yml
-    $ conda activate my_miranda_env
-    $ pip install -e .[dev]
diff --git a/pyproject.toml b/pyproject.toml
@@ -277,6 +277,9 @@ checks = [
 exclude = [
   # don't report on objects that match any of these regex
   '\.undocumented_method$',
+  '\.__enter__$',
+  '\.__eq__$',
+  '\.__exit__$',
   '\.__repr__$'
 ]
 override_SS05 = [

diff --git a/src/miranda/__init__.py b/src/miranda/__init__.py
@@ -37,5 +37,4 @@
     utils,
     validators,
 )
-from .data import DataBase
 from .storage import FileMeta, StorageState
diff --git a/src/miranda/archive/_groupings.py b/src/miranda/archive/_groupings.py
@@ -1,3 +1,5 @@
+"""Grouping functions for files based on different criteria."""
+
 from __future__ import annotations
 
 import logging
@@ -30,17 +32,22 @@ def group_by_length(
     size: int = 10,
     sort: bool = False,
 ) -> list[list[Path]]:
-    """Group files by an arbitrary number of file entries.
+    """
+    Group files by an arbitrary number of file entries.
 
     Parameters
     ----------
-    files: GeneratorType or list of str or pathlib.Path
-    size: int
-    sort: bool
+    files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
+    size : int
+        The number of files to be grouped together.
+    sort : bool
+        Sort the files before grouping.
 
     Returns
     -------
     list[list[pathlib.Path]]
+        Grouped files.
     """
     msg = f"Creating groups of {size} files"
     logging.info(msg)
@@ -67,15 +74,18 @@ def group_by_length(
 def group_by_deciphered_date(
     files: GeneratorType | list[str | Path],
 ) -> dict[str, list[Path]]:
-    """Find a common date and groups files based on year and month.
+    """
+    Find a common date and groups files based on year and month.
 
     Parameters
     ----------
-    files: GeneratorType or list of str or pathlib.Path
+    files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
 
     Returns
     -------
     dict[str, list[pathlib.Path]]
+        Grouped files.
     """
     logging.warning("This function doesn't work well with multi-thread processing!")
     logging.info("Creating files from deciphered dates.")
@@ -119,16 +129,20 @@ def group_by_deciphered_date(
 def group_by_size(
     files: GeneratorType | list[str | Path], size: int = 10 * GiB
 ) -> list[list[Path]]:
-    """Group files up until a desired size and save it as a grouping within a list.
+    """
+    Group files up until a desired size and save it as a grouping within a list.
 
     Parameters
     ----------
     files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
     size : int
+        The maximum size of the group.
 
     Returns
     -------
     list[list[pathlib.Path]]
+        Grouped files.
     """
     msg = f"Creating groups of files based on size not exceeding: {report_file_size(size)}."
 
@@ -157,16 +171,20 @@ def group_by_size(
 def group_by_subdirectories(
     files: GeneratorType | list[str | Path], within: str | Path | None = None
 ) -> dict[str, list[Path]]:
-    """Group files based on the parent folder that they are located within.
+    """
+    Group files based on the parent folder that they are located within.
 
     Parameters
     ----------
     files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
     within : str or pathlib.Path
+        The parent folder to group the files by.
 
     Returns
     -------
     dict[str, list[pathlib.Path]]
+        Grouped files.
     """
     if not within:
         within = Path.cwd()

diff --git a/src/miranda/archive/_selection.py b/src/miranda/archive/_selection.py
@@ -1,3 +1,5 @@
+"""Select files by the date on which they were last modified."""
+
 from __future__ import annotations
 
 import logging
@@ -20,22 +22,30 @@ def select_by_date_modified(
     day: int | None,
     *,
     suffixes: str = "nc",
-    date: datetime,
+    date: datetime.date,
 ) -> list[Path]:
-    """Select files by the date on which they were last modified.
+    """
+    Select files by the date on which they were last modified.
 
     Parameters
     ----------
     source : str or Path
+        The directory to search for files.
     year : int
+        The year of the date to select.
     month : int
+        The month of the date to select
     day : int
+        The day of the date to select.
     suffixes : str
-    date : datetime.date
+        The file suffixes to search.
+    date : date
+        The date to select.
 
     Returns
     -------
     list of Path
+        The selected files.
     """
     if date:
         date_selected = date

diff --git a/src/miranda/convert/deh.py b/src/miranda/convert/deh.py
@@ -19,7 +19,9 @@
 __all__ = ["open_txt"]
 
 # CMOR-like attributes
-cmor = json.load(open(Path(__file__).parent / "data" / "deh_cf_attrs.json"))[  # noqa
+cmor = json.load(
+    Path(__file__).parent.joinpath("data").joinpath("deh_cf_attrs.json").open()
+)[  # noqa
     "variable_entry"
 ]
 
@@ -34,7 +36,19 @@
 
 
 def extract_daily(path: os.PathLike | str) -> tuple[dict, pd.DataFrame]:
-    """Extract data and metadata from DEH (MELCC) stream flow file."""
+    """
+    Extract data and metadata from DEH (MELCCFP) stream flow file.
+
+    Parameters
+    ----------
+    path : os.PathLike or str
+        The path to the file.
+
+    Returns
+    -------
+    tuple[dict, pd.DataFrame]
+        The metadata and the data.
+    """
     with Path(path).open("r", encoding="latin1") as fh:
         txt = fh.read()
         txt = re.sub(" +", " ", txt)
@@ -70,7 +84,23 @@ def extract_daily(path: os.PathLike | str) -> tuple[dict, pd.DataFrame]:
 
 
 def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict) -> xr.Dataset:
-    """Return CF-compliant metadata."""
+    """
+    Return CF-compliant metadata.
+
+    Parameters
+    ----------
+    meta : dict
+        The metadata dictionary.
+    data : pd.DataFrame
+        The data DataFrame.
+    cf_table : dict
+        The CF table dictionary.
+
+    Returns
+    -------
+    xr.Dataset
+        The CF-compliant dataset.
+    """
     ds = xr.Dataset()
 
     ds["q"] = xr.DataArray(data["Débit"], attrs=cf_table["q"])
@@ -84,7 +114,20 @@ def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict) -> xr.Dataset:
         attrs={"long_name": "drainage area", "units": "km2"},
     )
 
-    def parse_dms(coord):
+    def _parse_dms(coord: str) -> float:
+        """
+        Parse dimensions.
+
+        Parameters
+        ----------
+        coord : str
+            The coordinate string.
+
+        Returns
+        -------
+        float
+            The parsed coordinate.
+        """
         deg, minutes, seconds, _ = re.split("[°'\"]", coord)
         if float(deg) > 0:
             return round(
@@ -94,15 +137,15 @@ def parse_dms(coord):
 
     coords = meta["coords"].split(" // ")
     ds["lat"] = xr.DataArray(
-        parse_dms(coords[0]),
+        _parse_dms(coords[0]),
         attrs={
             "standard_name": "latitude",
             "long_name": "latitude",
             "units": "decimal_degrees",
         },
     )
     ds["lon"] = xr.DataArray(
-        parse_dms(coords[1]),
+        _parse_dms(coords[1]),
         attrs={
             "standard_name": "longitude",
             "long_name": "longitude",
@@ -122,6 +165,20 @@ def parse_dms(coord):
 
 
 def open_txt(path: str | Path, cf_table: dict | None = cmor) -> xr.Dataset:
-    """Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes."""
+    """
+    Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes.
+
+    Parameters
+    ----------
+    path : str or Path
+        The path to the file.
+    cf_table : dict, optional
+        The CF table dictionary.
+
+    Returns
+    -------
+    xr.Dataset
+        The CF-compliant dataset.
+    """
     meta, data = extract_daily(path)
     return to_cf(meta, data, cf_table)
diff --git a/src/miranda/convert/eccc.py b/src/miranda/convert/eccc.py
@@ -14,7 +14,16 @@
 
 
 def convert_canswe(file: str | Path, output: str | Path):
-    """Convert the CanSWE netCDF files to production-ready CF-compliant netCDFs."""
+    """
+    Convert the CanSWE netCDF files to production-ready CF-compliant netCDFs.
+
+    Parameters
+    ----------
+    file : str or Path
+        The path to the CanSWE netCDF file.
+    output : str or Path
+        The output directory.
+    """
     ds = xr.open_dataset(file)
     ds = ds.set_coords(
         [
@@ -31,14 +40,40 @@ def convert_canswe(file: str | Path, output: str | Path):
         ]
     )
 
-    def clean_flags(variable):
+    def clean_flags(variable: xr.DataArray) -> dict:
+        """
+        Clean the flags.
+
+        Parameters
+        ----------
+        variable : xr.DataArray
+            The flag variable.
+
+        Returns
+        -------
+        dict
+            The cleaned flags.
+        """
         values = list(map(bytes.decode, np.sort(pd.unique(variable.values.flatten()))))
         values[0] = "n"
         mean_dict = parse_desc(variable.description)
         meanings = " ".join(np.array([mean_dict[v] for v in values]))
         return dict(flag_values=values, flag_meanings=meanings)
 
-    def parse_desc(desc):
+    def parse_desc(desc: str) -> dict:
+        """
+        Parse the description attribute.
+
+        Parameters
+        ----------
+        desc : str
+            The description attribute.
+
+        Returns
+        -------
+        dict
+            The parsed description.
+        """
         d = dict(
             map(
                 lambda kv: (kv[0].strip(), "_".join(kv[1].replace(">", "").split())),