From abd7716617057b4fb3cceb2aa1fb352b7f32a60e Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Wed, 25 Sep 2024 15:33:29 -0400
Subject: [PATCH 01/10] documentation first pass

---
 src/miranda/archive/_groupings.py | 34 +++++++++++++-----
 src/miranda/archive/_selection.py | 15 ++++++--
 src/miranda/utils.py              | 59 ++++++++++++++++++++++++-------
 src/miranda/validators.py         |  9 ++++-
 4 files changed, 93 insertions(+), 24 deletions(-)

diff --git a/src/miranda/archive/_groupings.py b/src/miranda/archive/_groupings.py
index c7bc1ba2..1881427e 100644
--- a/src/miranda/archive/_groupings.py
+++ b/src/miranda/archive/_groupings.py
@@ -1,3 +1,5 @@
+"""Grouping functions for files based on different criteria."""
+
 from __future__ import annotations
 
 import logging
@@ -30,17 +32,22 @@ def group_by_length(
     size: int = 10,
     sort: bool = False,
 ) -> list[list[Path]]:
-    """Group files by an arbitrary number of file entries.
+    """
+    Group files by an arbitrary number of file entries.
 
     Parameters
     ----------
-    files: GeneratorType or list of str or pathlib.Path
-    size: int
-    sort: bool
+    files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
+    size : int
+        The number of files to be grouped together.
+    sort : bool
+        Sort the files before grouping.
 
     Returns
     -------
     list[list[pathlib.Path]]
+        Grouped files.
     """
     msg = f"Creating groups of {size} files"
     logging.info(msg)
@@ -67,15 +74,18 @@ def group_by_length(
 def group_by_deciphered_date(
     files: GeneratorType | list[str | Path],
 ) -> dict[str, list[Path]]:
-    """Find a common date and groups files based on year and month.
+    """
+    Find a common date and groups files based on year and month.
 
     Parameters
     ----------
-    files: GeneratorType or list of str or pathlib.Path
+    files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
 
     Returns
     -------
     dict[str, list[pathlib.Path]]
+        Grouped files.
     """
     logging.warning("This function doesn't work well with multi-thread processing!")
     logging.info("Creating files from deciphered dates.")
@@ -119,16 +129,20 @@ def group_by_deciphered_date(
 def group_by_size(
     files: GeneratorType | list[str | Path], size: int = 10 * GiB
 ) -> list[list[Path]]:
-    """Group files up until a desired size and save it as a grouping within a list.
+    """
+    Group files up until a desired size and save it as a grouping within a list.
 
     Parameters
     ----------
     files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
     size : int
+        The maximum size of the group.
 
     Returns
     -------
     list[list[pathlib.Path]]
+        Grouped files.
     """
     msg = f"Creating groups of files based on size not exceeding: {report_file_size(size)}."
 
@@ -157,16 +171,20 @@ def group_by_size(
 def group_by_subdirectories(
     files: GeneratorType | list[str | Path], within: str | Path | None = None
 ) -> dict[str, list[Path]]:
-    """Group files based on the parent folder that they are located within.
+    """
+    Group files based on the parent folder that they are located within.
 
     Parameters
     ----------
     files : GeneratorType or list of str or pathlib.Path
+        The files to be grouped.
     within : str or pathlib.Path
+        The parent folder to group the files by.
 
     Returns
     -------
     dict[str, list[pathlib.Path]]
+        Grouped files.
     """
     if not within:
         within = Path.cwd()
diff --git a/src/miranda/archive/_selection.py b/src/miranda/archive/_selection.py
index 2c9d1a1e..696e699d 100644
--- a/src/miranda/archive/_selection.py
+++ b/src/miranda/archive/_selection.py
@@ -1,3 +1,4 @@
+"""Select files by the date on which they were last modified."""
 from __future__ import annotations
 
 import logging
@@ -20,22 +21,30 @@ def select_by_date_modified(
     day: int | None,
     *,
     suffixes: str = "nc",
-    date: datetime,
+    date: datetime.date,
 ) -> list[Path]:
-    """Select files by the date on which they were last modified.
+    """
+    Select files by the date on which they were last modified.
 
     Parameters
     ----------
     source : str or Path
+        The directory to search for files.
     year : int
+        The year of the date to select.
     month : int
+        The month of the date to select
     day : int
+        The day of the date to select.
     suffixes : str
-    date : datetime.date
+        The file suffixes to search.
+    date : date
+        The date to select.
 
     Returns
     -------
     list of Path
+        The selected files.
     """
     if date:
         date_selected = date
diff --git a/src/miranda/utils.py b/src/miranda/utils.py
index 15c17054..beab2288 100644
--- a/src/miranda/utils.py
+++ b/src/miranda/utils.py
@@ -39,7 +39,8 @@
 
 
 class HiddenPrints:
-    """Special context manager for hiding print statements.
+    """
+    Special context manager for hiding print statements.
 
     Notes
     -----
@@ -346,8 +347,9 @@ def read_privileges(location: str | Path, strict: bool = False) -> bool:
 
     Parameters
     ----------
-    location: str or Path
-    strict: bool
+    location : str or Path
+
+    strict : bool
 
     Returns
     -------
@@ -373,10 +375,28 @@ def read_privileges(location: str | Path, strict: bool = False) -> bool:
         return False
 
 
-# Function addressing exploit CVE-2007-4559
-def is_within_directory(
+def _is_within_directory(
     directory: str | os.PathLike, target: str | os.PathLike
 ) -> bool:
+    """
+    Check if a target path is within a directory.
+
+    Parameters
+    ----------
+    directory : str or os.PathLike
+        The directory to check.
+    target : str or os.PathLike
+        The target path to check.
+
+    Returns
+    -------
+    bool
+        Whether the target path is within the directory.
+
+    Notes
+    -----
+    Function addressing exploit CVE-2007-4559 for both tar and zip files.
+    """
     abs_directory = Path(directory).resolve()
     abs_target = Path(target).resolve()
 
@@ -384,29 +404,44 @@ def is_within_directory(
     return prefix == abs_directory
 
 
-# Function addressing exploit CVE-2007-4559 for both tar and zip files
 def safe_extract(
     archive: tarfile.TarFile | zipfile.ZipFile,
     path: str = ".",
-    members=None,
+    members: list[str] | None = None,
     *,
-    numeric_owner=False,
+    numeric_owner: bool = False,
 ) -> None:
-    # Handle tarfile extraction
+    """
+    Extract all members from the archive to the current working directory or directory path.
+
+    Parameters
+    ----------
+    archive : TarFile or ZipFile
+        The archive to extract.
+    path : str, optional
+        The path to extract the archive to.
+    members : list of str, optional
+        The members to extract.
+    numeric_owner : bool
+        Whether to extract the archive with numeric owner. Default: False.
+
+    Notes
+    -----
+    Function addressing exploit CVE-2007-4559 for both tar and zip files.
+    """
     if isinstance(archive, tarfile.TarFile):
         for member in archive.getmembers():
             member_path = Path(path).joinpath(member.name)
-            if not is_within_directory(path, member_path):
+            if not _is_within_directory(path, member_path):
                 raise Exception("Attempted Path Traversal in Tar File")
         archive.extractall(  # noqa: S202
             path, members=members, numeric_owner=numeric_owner
         )
 
-    # Handle zipfile extraction
     elif isinstance(archive, zipfile.ZipFile):
         for member in archive.namelist():
             member_path = Path(path).joinpath(member)
-            if not is_within_directory(path, member_path):
+            if not _is_within_directory(path, member_path):
                 raise Exception("Attempted Path Traversal in Zip File")
         archive.extractall(path, members=members)  # noqa: S202
     else:
diff --git a/src/miranda/validators.py b/src/miranda/validators.py
index f6bea948..f14d222f 100644
--- a/src/miranda/validators.py
+++ b/src/miranda/validators.py
@@ -151,11 +151,18 @@
 
 
 def url_validate(target: str) -> typing.Match[str] | None:
-    """Validate whether a supplied URL is reliably written.
+    """
+    Validate whether a supplied URL is reliably written.
 
     Parameters
     ----------
     target : str
+        The URL to validate.
+
+    Returns
+    -------
+    typing.Match[str], optional
+        The match object if the URL is valid.
 
     References
     ----------

From 10cc95964f1ece7e00f4010510459ebbf5dddd7f Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Wed, 25 Sep 2024 16:04:10 -0400
Subject: [PATCH 02/10] exclude underlined modules

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bd760c3c..3557a7a5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -67,7 +67,7 @@ repos:
     rev: v1.8.0
     hooks:
       - id: numpydoc-validation
-        exclude: ^docs/|^tests/|^templates/
+        exclude: ^docs/|^tests/|^templates/|.*\/_.*\.py$
   - repo: https://github.com/Yelp/detect-secrets
     rev: v1.5.0
     hooks:

From 1d411b09c1d472589f201f9a631385c692144108 Mon Sep 17 00:00:00 2001
From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com>
Date: Wed, 25 Sep 2024 17:21:55 -0400
Subject: [PATCH 03/10] remove DataBase code, second pass at documenting
 functions and members

---
 pyproject.toml                    |   3 +
 src/miranda/archive/_selection.py |   1 +
 src/miranda/convert/deh.py        |  52 ++++++++-
 src/miranda/data.py               | 168 ------------------------------
 src/miranda/remote/remove.py      |  66 ++++++------
 src/miranda/storage.py            | 121 +++++++++++++--------
 src/miranda/units.py              |   8 +-
 src/miranda/utils.py              |  55 +++++++---
 8 files changed, 208 insertions(+), 266 deletions(-)
 delete mode 100644 src/miranda/data.py

diff --git a/pyproject.toml b/pyproject.toml
index ca412a0c..e0f5b8c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -277,6 +277,9 @@ checks = [
 exclude = [
   # don't report on objects that match any of these regex
   '\.undocumented_method$',
+  '\.__enter__$',
+  '\.__eq__$',
+  '\.__exit__$',
   '\.__repr__$'
 ]
 override_SS05 = [
diff --git a/src/miranda/archive/_selection.py b/src/miranda/archive/_selection.py
index 696e699d..c8b9594b 100644
--- a/src/miranda/archive/_selection.py
+++ b/src/miranda/archive/_selection.py
@@ -1,4 +1,5 @@
 """Select files by the date on which they were last modified."""
+
 from __future__ import annotations
 
 import logging
diff --git a/src/miranda/convert/deh.py b/src/miranda/convert/deh.py
index 42fdd162..b1a08309 100644
--- a/src/miranda/convert/deh.py
+++ b/src/miranda/convert/deh.py
@@ -19,7 +19,9 @@
 __all__ = ["open_txt"]
 
 # CMOR-like attributes
-cmor = json.load(open(Path(__file__).parent / "data" / "deh_cf_attrs.json"))[  # noqa
+cmor = json.load(
+    Path(__file__).parent.joinpath("data").joinpath("deh_cf_attrs.json").open()
+)[  # noqa
     "variable_entry"
 ]
 
@@ -34,7 +36,19 @@
 
 
 def extract_daily(path: os.PathLike | str) -> tuple[dict, pd.DataFrame]:
-    """Extract data and metadata from DEH (MELCC) stream flow file."""
+    """
+    Extract data and metadata from DEH (MELCCFP) stream flow file.
+
+    Parameters
+    ----------
+    path : os.PathLike or str
+        The path to the file.
+
+    Returns
+    -------
+    tuple[dict, pd.DataFrame]
+        The metadata and the data.
+    """
     with Path(path).open("r", encoding="latin1") as fh:
         txt = fh.read()
         txt = re.sub(" +", " ", txt)
@@ -70,7 +84,23 @@ def extract_daily(path: os.PathLike | str) -> tuple[dict, pd.DataFrame]:
 
 
 def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict) -> xr.Dataset:
-    """Return CF-compliant metadata."""
+    """
+    Return CF-compliant metadata.
+
+    Parameters
+    ----------
+    meta : dict
+        The metadata dictionary.
+    data : pd.DataFrame
+        The data DataFrame.
+    cf_table : dict
+        The CF table dictionary.
+
+    Returns
+    -------
+    xr.Dataset
+        The CF-compliant dataset.
+    """
     ds = xr.Dataset()
 
     ds["q"] = xr.DataArray(data["Débit"], attrs=cf_table["q"])
@@ -122,6 +152,20 @@ def parse_dms(coord):
 
 
 def open_txt(path: str | Path, cf_table: dict | None = cmor) -> xr.Dataset:
-    """Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes."""
+    """
+    Extract daily HQ meteorological data and convert to xr.DataArray with CF-Convention attributes.
+
+    Parameters
+    ----------
+    path : str or Path
+        The path to the file.
+    cf_table : dict, optional
+        The CF table dictionary.
+
+    Returns
+    -------
+    xr.Dataset
+        The CF-compliant dataset.
+    """
     meta, data = extract_daily(path)
     return to_cf(meta, data, cf_table)
diff --git a/src/miranda/data.py b/src/miranda/data.py
deleted file mode 100644
index db20e0ee..00000000
--- a/src/miranda/data.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""Database Management module."""
-
-from __future__ import annotations
-
-import logging.config
-import os
-from pathlib import Path
-from types import GeneratorType
-from typing import Optional
-
-from .io import find_filepaths
-from .scripting import LOGGING_CONFIG
-from .units import GiB
-from .validators import url_validate
-
-logging.config.dictConfig(LOGGING_CONFIG)
-
-__all__ = ["DataBase"]
-
-
-class DataBase:
-    """Database management class."""
-
-    def __init__(
-        self,
-        source,
-        *,
-        destination: Path | str | None = None,
-        common_path: Path | str | None = None,
-        file_pattern: str | list[str] = "*.nc",
-        project_name: str | None = None,
-        recursive: bool = True,
-    ):
-        self._source = Path(source)
-
-        if destination is not None:
-            self._destination = Path(destination)
-        else:
-            self._destination = Path().cwd()
-
-        self.project_name = str(project_name)
-        if not self.project_name:
-            self.project_name = self._destination.stem
-
-        if not file_pattern:
-            self.file_suffixes = ["*"]
-
-        elif isinstance(file_pattern, str):
-            self.file_suffixes = [file_pattern]
-        elif isinstance(file_pattern, (GeneratorType, list)):
-            self.file_suffixes = file_pattern
-
-        if not recursive:
-            self.recursive = False
-        else:
-            self.recursive = True
-
-        # if common_path is None:
-        #     self._common_path = Path(source)
-
-        self._files = self._scrape(source)
-        self._is_server = False
-
-        self.successful_transfers = 0
-
-    def __repr__(self):
-        """Repl function."""
-        return f"<{self.__class__.__module__}.{self.__class__.__name__} object at {hex(id(self))}>"
-
-    def __str__(self):
-        """String function."""
-        prepr = "[{}]".format(
-            ", ".join([f'{k}: "{v}"' for k, v in self.__dict__.items()])
-        )
-        return f"{self.__class__.__name__}({prepr})"
-
-    def __getitem__(self, key):
-        """Getter."""
-        return self.__dict__[key]
-
-    def __setitem__(self, key, value):
-        """Setter."""
-        self.__dict__[key] = value
-
-    def __delitem__(self, key):
-        """Delete item."""
-        del self.__dict__[key]
-
-    def __contains__(self, key):
-        """Contains function."""
-        return key in self.__dict__
-
-    def __len__(self):
-        """Length."""
-        return len(self._files)
-
-    def _scrape(self, source) -> list[Path]:
-        if source is None:
-            raise ValueError("No source provided.")
-        if isinstance(source, (GeneratorType, list, tuple, str, Path)):
-            files = find_filepaths(source, **self._as_dict())
-            common_path = os.path.commonpath(files)
-            self._files = files
-            self._common_path = common_path
-            return files
-        raise ValueError("Source must be an iterable of strings or Paths.")
-
-    def _as_dict(self):
-        return {
-            key: value
-            for key, value in self.__dict__.items()
-            if not key.startswith("_") and not callable(key)
-        }
-
-    def items(self):
-        """Show items."""
-        return self._as_dict().items()
-
-    def keys(self):
-        """Show keys."""
-        return self._as_dict().keys()
-
-    def values(self):
-        """Show values."""
-        return self._as_dict().values()
-
-    def group_by(
-        self,
-        *,
-        common_path: Path | str | None = None,
-        subdirectories: bool = True,
-        dates: bool = True,
-        size: int = 10 * GiB,
-    ):
-        """Grouping meta-function.
-
-        Notes
-        -----
-        Not yet implemented.
-
-        """
-        # use_grouping = True
-        #
-        # if subdirectories:
-        #     file_groups = group_by_subdirectories(self._files, within=common_path)
-        #
-        # else:
-        #     file_groups = defaultdict(lambda: list())
-        #     for f in self._files:
-        #         file_groups["."].append(f)
-        pass
-
-    def target(self, target: Path | str):
-        """Target directory or server address."""
-        self._destination = target
-        self._is_server = self._url_validate(target=target)
-
-    @staticmethod
-    def _url_validate(target):
-        return url_validate(target=target)
-
-    def archive(self):
-        """Not yet implemented."""
-        pass
-
-    def transfer(self):
-        """Not yet implemented."""
-        pass
diff --git a/src/miranda/remote/remove.py b/src/miranda/remote/remove.py
index ab595905..8e7e200e 100644
--- a/src/miranda/remote/remove.py
+++ b/src/miranda/remote/remove.py
@@ -8,7 +8,6 @@
 from getpass import getpass
 from pathlib import Path
 from types import GeneratorType
-from typing import Optional
 
 from miranda.io.utils import creation_date
 from miranda.scripting import LOGGING_CONFIG
@@ -34,16 +33,13 @@
 
 
 def file_emptier(*, file_list: list[str | Path] | GeneratorType) -> None:
-    """Open and overwrite a list of file paths in order to delete data while preserving the file name.
+    """
+    Open and overwrite a list of file paths in order to delete data while preserving the file name.
 
     Parameters
     ----------
     file_list : list of str or Path, or GeneratorType
         List of files to be overwritten
-
-    Returns
-    -------
-    None
     """
     file_list = sorted([Path(f) for f in file_list])
 
@@ -56,6 +52,7 @@ def file_emptier(*, file_list: list[str | Path] | GeneratorType) -> None:
         Path(file).open("w").close()
 
 
+# FIXME: This function is terribly insecure. It should be refactored to use a more secure method of authentication.
 def delete_by_date(
     *,
     source: str | Path,
@@ -68,23 +65,20 @@ def delete_by_date(
     password: str | None = None,
     date_object: date | None = None,
 ) -> None:
-    """Remove a selection of files based on a given date of last modification.
+    """
+    Remove a selection of files based on a given date of last modification.
 
     Parameters
     ----------
-    source: str or Path
-    year: int, optional
-    month: int, optional
-    day: int, optional
-    pattern: str, optional
-    server: str or Path, optional
-    user: str, optional
-    password: str, optional
-    date_object: date, optional
-
-    Returns
-    -------
-    None
+    source : str or Path
+    year : int, optional
+    month : int, optional
+    day : int, optional
+    pattern : str, optional
+    server : str or Path, optional
+    user : str, optional
+    password : str, optional
+    date_object : date, optional
     """
     user = user or input("Username:")
     password = password or getpass("Password:")
@@ -134,6 +128,7 @@ def delete_by_date(
     return
 
 
+# FIXME: This function is terribly insecure. It should be refactored to use a more secure method of authentication.
 def delete_duplicates(
     *,
     source: str | Path,
@@ -144,21 +139,25 @@ def delete_duplicates(
     pattern: str | None = None,
     delete_target_duplicates: bool = False,
 ) -> None:
-    """Delete duplicate files.
+    """
+    Delete duplicate files.
 
     Parameters
     ----------
     source : str or Path
+        The source directory to compare against.
     target : str or Path
+        The target directory to compare against.
     server : str or Path, optional
+        The server address.
     user: str
+        The username.
     password : str
+        The password.
     pattern: str
+        The file pattern to search for.
     delete_target_duplicates : bool
-
-    Returns
-    -------
-    None
+        Whether to delete the duplicates in the target directory.
     """
     user = user or input("Username:")
     password = password or getpass("Password:")
@@ -198,6 +197,7 @@ def delete_duplicates(
     return
 
 
+# FIXME: This function is terribly insecure. It should be refactored to use a more secure method of authentication.
 def delete_by_variable(
     *,
     target: str | Path | list[str | Path] | GeneratorType | None = None,
@@ -208,24 +208,28 @@ def delete_by_variable(
     file_suffix: str | None = None,
     delete: bool = False,
 ) -> None:
-    """Delete according to variable name.
+    """
+    Delete according to variable name.
 
     Given target location(s), a list of variables and a server address, perform a glob search
     and delete file names starting with the variables identified
 
     Parameters
     ----------
-    target : str, Path, list of str or Path, or GeneratorType]
+    target : str, Path, list of str or Path, or GeneratorType
+        The target location(s).
     variables : list of str
+        The variables to search for.
     server : str or Path, optional
+        The server address.
     user : str, optional
+        The username.
     password : str, optional
+        The password.
     file_suffix : str, optional
+        The file suffix to search for.
     delete : bool
-
-    Returns
-    -------
-    None
+        Whether to delete the files
     """
     user = user or input("Username:")
     password = password or getpass("Password:")
diff --git a/src/miranda/storage.py b/src/miranda/storage.py
index 525c428c..bfe4d399 100644
--- a/src/miranda/storage.py
+++ b/src/miranda/storage.py
@@ -1,19 +1,14 @@
 """
-=====================
-Disk space management
-=====================
+Disk space management.
 
 Classes:
-
  * DiskSpaceError - the exception raised on failure.
  * :py:class:`FileMeta` - file and its size.
  * :py:class:`StorageState` - storage capacity and availability of a medium.
 
 Functions:
-
  * :py:func:`total_size` - get total size of a list of files.
  * :py:func:`size_division` - divide files based on number and size restrictions.
-
 """
 
 from __future__ import annotations
@@ -22,7 +17,6 @@
 import logging.config
 import subprocess  # noqa: S404
 from functools import reduce
-from multiprocessing.managers import Value
 from pathlib import Path
 from types import GeneratorType
 
@@ -49,66 +43,80 @@ class DiskSpaceError(Exception):
 
 
 class FileMeta:
-    """File path and size."""
+    """
+    File path and size.
 
-    django = {
-        "path": ["CharField", "max_length=512"],
-        "size": ["IntegerField", "null=True", "blank=True"],
-    }
+    Parameters
+    ----------
+    path : str
+        The full path of the file.
+    size : int
+        The size of file in bytes.
+    """
 
     def __init__(self, path: str, size: int = -1):
-        """Initialize file meta.
+        """
+        Initialize file meta.
 
         Parameters
         ----------
         path : str
-            full path of the file.
+            The full path of the file.
         size : int
-            size of file in bytes (default: will obtain from os.path.getsize
-            if file exists, set to 0 otherwise).
-
+            The size of file in bytes.
+            Will obtain from os.path.getsize if file exists, set to 0 otherwise.
         """
         # Make sure we have the full path of the file
-        self.path = Path(path).absolute()
+        self._path = Path(path).absolute()
 
         # Get size of file if it is not specified
-        if (-1 == size) and self.path.exists():
+        if (-1 == size) and self._path.exists():
             try:
-                self.size = self.path.stat().st_size
+                self.size = self._path.stat().st_size
             except OSError:
-                raise DiskSpaceError(f"Cannot get size of {self.path.name}.")
+                raise DiskSpaceError(f"Cannot get size of {self._path.name}.")
         elif -1 == size:
             self.size = 0
         else:
             self.size = size
 
     def __eq__(self, other):  # noqa: D105
-        if self.path == other.path:
+        if self._path == other._path:  # noqa
             return True
         else:
             return False
 
 
 class StorageState:
-    """Information regarding the storage capacity of a disk."""
+    """
+    Information regarding the storage capacity of a disk.
+
+    Parameters
+    ----------
+    base_path : Path
+        The base path of the storage medium.
+    capacity : int
+        Capacity of medium in bytes.
+    used_space : int
+        Space currently used on the medium.
+    free_space : int
+        Space available on the medium.
+    """
 
     def __init__(self, base_path, capacity=-1, used_space=-1, free_space=-1):
-        """Initialize storage state.
+        """
+        Initialize storage state.
 
         Parameters
         ----------
         base_path : str
-            base path of the storage medium.
+            The base path of the storage medium.
         capacity : int
-            capacity of medium in bytes (default: will obtain from system
-            call to 'df').
+            Capacity of medium in bytes (default: will obtain from system call to 'df').
         used_space : int
-            space currently used on the medium (default: will obtain from
-            system call to 'df').
+            Space currently used on the medium (default: will obtain from system call to 'df').
         free_space : int
-            space available on the medium (default: will obtain from system
-            call to 'df').
-
+            Space available on the medium (default: will obtain from system call to 'df').
         """
         # Make sure we have the full base path
         if len(base_path) > 1:
@@ -158,17 +166,18 @@ def __init__(self, base_path, capacity=-1, used_space=-1, free_space=-1):
 
 
 def size_evaluation(file_list: list[str | FileMeta | Path]) -> int:
-    """Total size of files.
+    """
+    Total size of files.
 
     Parameters
     ----------
     file_list : list of str or Path or FileMeta
+        List of files to evaluate.
 
     Returns
     -------
     int
-      total size of files in bytes.
-
+        The total size of files in bytes.
     """
     if file_list:
         size = 0
@@ -192,11 +201,12 @@ def size_division(
     check_name_repetition: bool = False,
     preserve_order: bool = False,
 ) -> list[list]:
-    """Divide files according to size and number limits.
+    """
+    Divide files according to size and number limits.
 
     Parameters
     ----------
-    files_to_divide : list of str or Path, FileMeta, Path
+    files_to_divide : list of str or Path or FileMeta
         Files to be sorted.
     size_limit : int
         Size limit of divisions in bytes. Default: 0 (no limit).
@@ -210,7 +220,7 @@ def size_division(
     Returns
     -------
     list[list]
-        list of divisions (each division is a list of FileMeta objects).
+        The list of divisions (each division is a list of FileMeta objects).
     """
     divisions = list()
     for file_divide in files_to_divide:
@@ -227,7 +237,7 @@ def size_division(
             flag_skip = 0
             for file_divided in division:
                 if check_name_repetition and (
-                    Path(file_divided.path).name == Path(file_divide.path).name
+                    Path(file_divided._path).name == Path(file_divide._path).name
                 ):
                     flag_skip = 1
                 size = size + file_divided.size
@@ -258,15 +268,18 @@ def file_size(
         | dict[str, Path | list[Path]]
     )
 ) -> int:
-    """Return size of object in bytes.
+    """
+    Return size of object in bytes.
 
     Parameters
     ----------
     file_path_or_bytes_or_dict : Path or str or int, list of str or Path, GeneratorType, or dict[str, Path or list of Path]
+        The file or object to be evaluated.
 
     Returns
     -------
     int
+        The size of the file or object in bytes.
     """
     try:
         if isinstance(file_path_or_bytes_or_dict, int):
@@ -322,7 +335,8 @@ def report_file_size(
     use_binary: bool = True,
     significant_digits: int = 2,
 ) -> str:
-    """Report file size in a human-readable format.
+    """
+    Report file size in a human-readable format.
 
     This function will parse the contents of a list or generator of files and return the
     size in bytes of a file or a list of files in pretty formatted text.
@@ -330,14 +344,37 @@ def report_file_size(
     Parameters
     ----------
     file_path_or_bytes_or_dict : Path or str or int, list of str or Path, GeneratorType, or dict[str, Path or list of Path]
+        The file or object to be evaluated.
     use_binary : bool
+        Flag to use binary conversion (default: True).
     significant_digits : int
+        Number of significant digits to display (default: 2).
 
+    Returns
+    -------
+    str
+        The file size in a human-readable format.
     """
     conversions = ["B", "k{}B", "M{}B", "G{}B", "T{}B", "P{}B", "E{}B", "Z{}B", "Y{}B"]
 
     def _size_formatter(i: int, binary: bool = True, precision: int = 2) -> str:
-        """Format byte size into an appropriate nomenclature for prettier printing."""
+        """
+        Format byte size into an appropriate nomenclature for prettier printing.
+
+        Parameters
+        ----------
+        i : int
+            The size in bytes.
+        binary : bool
+            Flag to use binary conversion (default: True).
+        precision : int
+            Number of significant digits to display (default: 2).
+
+        Returns
+        -------
+        str
+            The formatted byte size.
+        """
         import math
 
         base = 1024 if binary else 1000
diff --git a/src/miranda/units.py b/src/miranda/units.py
index 5b86dddb..75992685 100644
--- a/src/miranda/units.py
+++ b/src/miranda/units.py
@@ -19,7 +19,8 @@ def get_time_frequency(
     expected_period: str | None = None,
     minimum_continuous_period: str = "1M",
 ) -> tuple[list[int | str], str]:
-    """Try to understand the Dataset frequency.
+    """
+    Try to understand the Dataset frequency.
 
     If it can't be inferred with :py:func:`xarray.infer_freq` it tries to:
     - look for a "freq" attrs in the global or time variable attributes.
@@ -45,10 +46,9 @@ def get_time_frequency(
     Returns
     -------
     offset : list of int or str
-        The offset a list of (multiplier, base)
+        The offset a list of (multiplier, base).
     offset_meaning : str
-        The offset meaning (single word)
-
+        The offset meaning (single word).
     """
     if expected_period is not None:
         if not [expected_period.endswith(end) for end in ["H", "D", "M", "A"]]:
diff --git a/src/miranda/utils.py b/src/miranda/utils.py
index beab2288..79dc3528 100644
--- a/src/miranda/utils.py
+++ b/src/miranda/utils.py
@@ -59,7 +59,20 @@ def __exit__(self, exc_type, exc_val, exc_tb):  # noqa: D105
 
 
 def chunk_iterables(iterable: Sequence, chunk_size: int) -> Iterable:
-    """Generate lists of `chunk_size` elements from `iterable`.
+    """
+    Generate lists of `chunk_size` elements from `iterable`.
+
+    Parameters
+    ----------
+    iterable : Sequence
+        The iterable to chunk.
+    chunk_size : int
+        The size of the chunks.
+
+    Yields
+    ------
+    Iterable
+        The chunked iterable.
 
     Notes
     -----
@@ -78,9 +91,11 @@ def chunk_iterables(iterable: Sequence, chunk_size: int) -> Iterable:
             break
 
 
+# FIXME: The following function could probably be replaced or at least placed closer to its usages.
 @contextmanager
 def working_directory(directory: str | Path) -> None:
-    """Change the working directory within a context object.
+    """
+    Change the working directory within a context object.
 
     This function momentarily changes the working directory within the context and reverts to the file working directory
     when the code block it is acting upon exits
@@ -88,10 +103,7 @@ def working_directory(directory: str | Path) -> None:
     Parameters
     ----------
     directory : str or pathlib.Path
-
-    Returns
-    -------
-    None
+        The directory to temporarily change to.
     """
     owd = os.getcwd()  # noqa: PTH109
 
@@ -105,19 +117,22 @@ def working_directory(directory: str | Path) -> None:
         os.chdir(owd)
 
 
+# FIXME: The following function could probably be replaced or at least placed closer to its usages.
 def single_item_list(iterable: Iterable) -> bool:
-    """Ascertain whether a list has exactly one entry.
+    """
+    Ascertain whether a list has exactly one entry.
 
     See: https://stackoverflow.com/a/16801605/7322852
 
     Parameters
     ----------
     iterable : Iterable
+        The list to check.
 
     Returns
     -------
     bool
-
+        Whether the list is a single item.
     """
     iterator = iter(iterable)
     has_true = any(iterator)  # consume from "i" until first true or it's exhausted
@@ -131,19 +146,20 @@ def generic_extract_archive(
     resources: str | Path | list[bytes | str | Path],
     output_dir: str | Path | None = None,
 ) -> list[Path]:
-    """Extract archives (tar/zip) to a working directory.
+    """
+    Extract archives (tar/zip) to a working directory.
 
     Parameters
     ----------
     resources : str or Path or list of bytes or str or Path
-        list of archive files (if netCDF files are in list, they are passed and returned as well in the return).
+        List of archive files (if netCDF files are in list, they are passed and returned as well in the return).
     output_dir : str or Path, optional
-        string or Path to a working location (default: temporary folder).
+        String or Path to a working location (default: temporary folder).
 
     Returns
     -------
     list
-        List of original or of extracted files
+        The list of original or of extracted files.
     """
     archive_types = [".gz", ".tar", ".zip", ".7z"]
     output_dir = output_dir or tempfile.gettempdir()
@@ -206,7 +222,8 @@ def generic_extract_archive(
 def list_paths_with_elements(
     base_paths: str | list[str] | os.PathLike[str], elements: list[str]
 ) -> list[dict]:
-    """List a given path structure.
+    """
+    List a given path structure.
 
     Parameters
     ----------
@@ -269,7 +286,8 @@ def list_paths_with_elements(
 def publish_release_notes(
     style: str = "md", file: os.PathLike | StringIO | TextIO | None = None
 ) -> str | None:
-    """Format release history in Markdown or ReStructuredText.
+    """
+    Format release history in Markdown or ReStructuredText.
 
     Parameters
     ----------
@@ -282,6 +300,7 @@ def publish_release_notes(
     Returns
     -------
     str, optional
+        The formatted string if a file object is not provided.
 
     Notes
     -----
@@ -343,18 +362,20 @@ def publish_release_notes(
 
 
 def read_privileges(location: str | Path, strict: bool = False) -> bool:
-    """Determine whether a user has read privileges to a specific file.
+    """
+    Determine whether a user has read privileges to a specific file.
 
     Parameters
     ----------
     location : str or Path
-
+        The location to be assessed.
     strict : bool
+        Whether to raise an exception if the user does not have read privileges. Default: False.
 
     Returns
     -------
     bool
-        Whether the current user shell has read privileges
+        Whether the current user shell has read privileges.
     """
     msg = ""
     try:

From 0a025f774cfd1942742b1b1e560b13e048e584f1 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 26 Sep 2024 13:48:09 -0400
Subject: [PATCH 04/10] docstring adjustments

---
 src/miranda/__init__.py          |   1 -
 src/miranda/convert/deh.py       |  19 ++++-
 src/miranda/convert/eccc.py      |  41 ++++++++-
 src/miranda/convert/eccc_rdrs.py |  69 ++++++++++-----
 src/miranda/convert/ecmwf.py     |  20 +++--
 src/miranda/convert/hq.py        |  86 ++++++++++++++++---
 src/miranda/convert/melcc.py     | 140 ++++++++++++++++++++++---------
 src/miranda/convert/utils.py     |  48 +++++++++--
 8 files changed, 335 insertions(+), 89 deletions(-)

diff --git a/src/miranda/__init__.py b/src/miranda/__init__.py
index 98e182c3..d08c7d8e 100644
--- a/src/miranda/__init__.py
+++ b/src/miranda/__init__.py
@@ -37,5 +37,4 @@
     utils,
     validators,
 )
-from .data import DataBase
 from .storage import FileMeta, StorageState
diff --git a/src/miranda/convert/deh.py b/src/miranda/convert/deh.py
index b1a08309..3fa5acd1 100644
--- a/src/miranda/convert/deh.py
+++ b/src/miranda/convert/deh.py
@@ -114,7 +114,20 @@ def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict) -> xr.Dataset:
         attrs={"long_name": "drainage area", "units": "km2"},
     )
 
-    def parse_dms(coord):
+    def _parse_dms(coord: str) -> float:
+        """
+        Parse dimensions.
+
+        Parameters
+        ----------
+        coord : str
+            The coordinate string.
+
+        Returns
+        -------
+        float
+            The parsed coordinate.
+        """
         deg, minutes, seconds, _ = re.split("[°'\"]", coord)
         if float(deg) > 0:
             return round(
@@ -124,7 +137,7 @@ def parse_dms(coord):
 
     coords = meta["coords"].split(" // ")
     ds["lat"] = xr.DataArray(
-        parse_dms(coords[0]),
+        _parse_dms(coords[0]),
         attrs={
             "standard_name": "latitude",
             "long_name": "latitude",
@@ -132,7 +145,7 @@ def parse_dms(coord):
         },
     )
     ds["lon"] = xr.DataArray(
-        parse_dms(coords[1]),
+        _parse_dms(coords[1]),
         attrs={
             "standard_name": "longitude",
             "long_name": "longitude",
diff --git a/src/miranda/convert/eccc.py b/src/miranda/convert/eccc.py
index 90b3296a..becf0509 100644
--- a/src/miranda/convert/eccc.py
+++ b/src/miranda/convert/eccc.py
@@ -14,7 +14,16 @@
 
 
 def convert_canswe(file: str | Path, output: str | Path):
-    """Convert the CanSWE netCDF files to production-ready CF-compliant netCDFs."""
+    """
+    Convert the CanSWE netCDF files to production-ready CF-compliant netCDFs.
+
+    Parameters
+    ----------
+    file : str or Path
+        The path to the CanSWE netCDF file.
+    output : str or Path
+        The output directory.
+    """
     ds = xr.open_dataset(file)
     ds = ds.set_coords(
         [
@@ -31,14 +40,40 @@ def convert_canswe(file: str | Path, output: str | Path):
         ]
     )
 
-    def clean_flags(variable):
+    def clean_flags(variable: xr.DataArray) -> dict:
+        """
+        Clean the flags.
+
+        Parameters
+        ----------
+        variable : xr.DataArray
+            The flag variable.
+
+        Returns
+        -------
+        dict
+            The cleaned flags.
+        """
         values = list(map(bytes.decode, np.sort(pd.unique(variable.values.flatten()))))
         values[0] = "n"
         mean_dict = parse_desc(variable.description)
         meanings = " ".join(np.array([mean_dict[v] for v in values]))
         return dict(flag_values=values, flag_meanings=meanings)
 
-    def parse_desc(desc):
+    def parse_desc(desc: str) -> dict:
+        """
+        Parse the description attribute.
+
+        Parameters
+        ----------
+        desc : str
+            The description attribute.
+
+        Returns
+        -------
+        dict
+            The parsed description.
+        """
         d = dict(
             map(
                 lambda kv: (kv[0].strip(), "_".join(kv[1].replace(">", "").split())),
diff --git a/src/miranda/convert/eccc_rdrs.py b/src/miranda/convert/eccc_rdrs.py
index afc34f84..6542b228 100644
--- a/src/miranda/convert/eccc_rdrs.py
+++ b/src/miranda/convert/eccc_rdrs.py
@@ -5,6 +5,7 @@
 import logging.config
 import os
 from pathlib import Path
+from typing import Any
 
 import xarray as xr
 from numpy import unique
@@ -26,37 +27,57 @@
 # FIXME: Can we use `name_output_file` instead? We already have a better version of this function.
 
 
-def _get_drop_vars(file: str | os.PathLike, *, keep_vars: list[str] | set[str]):
+def _get_drop_vars(file: str | os.PathLike[str], *, keep_vars: list[str] | set[str]):
+    """
+    Determine dropped variables.
+
+    Parameters
+    ----------
+    file : str or os.PathLike
+        The file to check.
+    keep_vars : list or set of str
+        The variables to keep.
+
+    Returns
+    -------
+    list
+        The dropped variables.
+    """
     drop_vars = list(xr.open_dataset(file).data_vars)
     return list(set(drop_vars) - set(keep_vars))
 
 
 def convert_rdrs(
     project: str,
-    input_folder: str | os.PathLike,
-    output_folder: str | os.PathLike,
+    input_folder: str | os.PathLike[str],
+    output_folder: str | os.PathLike[str],
     output_format: str = "zarr",
-    working_folder: str | os.PathLike | None = None,
+    working_folder: str | os.PathLike[str] | None = None,
     overwrite: bool = False,
     cfvariable_list: list | None = None,
-    **dask_kwargs,
+    **dask_kwargs: dict[str, Any],
 ) -> None:
-    r"""Convert RDRS dataset.
+    r"""
+    Convert RDRS dataset.
 
     Parameters
     ----------
     project : str
+        The project name.
     input_folder : str or os.PathLike
+        The input folder.
     output_folder : str or os.PathLike
+        The output folder.
     output_format : {"netcdf", "zarr"}
+        The output format.
     working_folder : str or os.PathLike, optional
+        The working folder.
     overwrite : bool
+        Whether to overwrite existing files. Default: False.
     cfvariable_list : list, optional
-    \*\*dask_kwargs
-
-    Returns
-    -------
-    None
+        The CF variable list.
+    \*\*dask_kwargs : dict
+        Additional keyword arguments passed to the Dask scheduler.
     """
     # TODO: This setup configuration is near-universally portable. Should we consider applying it to all conversions?
     var_attrs = load_json_data_mappings(project=project)["variables"]
@@ -76,7 +97,6 @@ def convert_rdrs(
         working_folder = Path(working_folder).expanduser()
 
     # FIXME: Do we want to collect everything? Maybe return a dictionary with years and associated files?
-
     out_freq = None
     gathered = gather_raw_rdrs_by_years(input_folder)
     for year, ncfiles in gathered[project].items():
@@ -139,26 +159,36 @@ def rdrs_to_daily(
     year_start: int | None = None,
     year_end: int | None = None,
     process_variables: list[str] | None = None,
-    **dask_kwargs,
+    **dask_kwargs: dict[str, Any],
 ) -> None:
-    r"""Write out RDRS files to daily-timestep files.
+    r"""
+    Write out RDRS files to daily-timestep files.
 
     Parameters
     ----------
     project : str
+        The project name.
     input_folder : str or os.PathLike
+        The input folder.
     output_folder : str or os.PathLike
+        The output folder.
     working_folder : str or os.PathLike
+        The working folder.
     overwrite : bool
+        Whether to overwrite existing files. Default: False.
     output_format : {"netcdf", "zarr"}
+        The output format.
     year_start : int, optional
+        The start year.
+        If not provided, the minimum year in the dataset will be used.
     year_end : int, optional
+        The end year.
+        If not provided, the maximum year in the dataset will be used.
     process_variables : list of str, optional
-    \*\*dask_kwargs
-
-    Returns
-    -------
-    None
+        The variables to process.
+        If not provided, all variables will be processed.
+    \*\*dask_kwargs : dict
+        Additional keyword arguments passed to the Dask scheduler.
     """
     if isinstance(input_folder, str):
         input_folder = Path(input_folder).expanduser()
@@ -187,6 +217,7 @@ def rdrs_to_daily(
             out_variables = aggregate(
                 xr.open_mfdataset(infiles, engine="zarr"), freq="day"
             )
+            # FIXME: Fetch chunk config has been modified to accept different arguments.
             chunks = fetch_chunk_config(project=project, freq="day")
             chunks["time"] = len(out_variables[list(out_variables.keys())[0]].time)
             write_dataset_dict(
diff --git a/src/miranda/convert/ecmwf.py b/src/miranda/convert/ecmwf.py
index 71ecc839..26346055 100644
--- a/src/miranda/convert/ecmwf.py
+++ b/src/miranda/convert/ecmwf.py
@@ -20,26 +20,34 @@
 __all__ = ["tigge_convert"]
 
 
+# FIXME: Is this function still pertinent?
 def tigge_convert(
     source: os.PathLike | None = None,
     target: os.PathLike | None = None,
     processes: int = 8,
 ) -> None:
-    """Convert grib2 file to netCDF format.
+    """
+    Convert TIGGE grib2 file to netCDF format.
 
     Parameters
     ----------
     source : os.PathLike, optional
+        The source directory containing the TIGGE files.
     target : os.PathLike, optional
+        The target directory to save the converted files.
     processes : int
-
-    Returns
-    -------
-    None
+        The number of processes to use for the conversion.
     """
 
     def _tigge_convert(fn):
-        """Launch reformatting function."""
+        """
+        Launch reformatting function.
+
+        Parameters
+        ----------
+        fn : tuple
+            The file and output folder.
+        """
         infile, output_folder = fn
         try:
             for f in Path(infile.parent).glob(infile.name.replace(".grib", "*.idx")):
diff --git a/src/miranda/convert/hq.py b/src/miranda/convert/hq.py
index 4f97f146..c33aa2a5 100644
--- a/src/miranda/convert/hq.py
+++ b/src/miranda/convert/hq.py
@@ -6,8 +6,10 @@
 import datetime as dt
 import json
 import logging.config
+import os
 import re
 from pathlib import Path
+from typing import Any
 
 import numpy as np
 import pandas as pd
@@ -68,8 +70,24 @@
 }
 
 
-def guess_variable(meta, cf_table: dict | None) -> tuple[str, str | None]:
-    """Return the corresponding CMOR variable."""
+def guess_variable(
+    meta: dict[str, Any], cf_table: dict | None
+) -> tuple[str, str | None]:
+    """
+    Return the corresponding CMOR variable.
+
+    Parameters
+    ----------
+    meta : dict
+        The metadata.
+    cf_table : dict, optional
+        The CMOR table.
+
+    Returns
+    -------
+    tuple
+        The variable name and the table name.
+    """
     if cf_table is None:
         cf_table = cmor
 
@@ -112,15 +130,27 @@ def guess_variable(meta, cf_table: dict | None) -> tuple[str, str | None]:
 cf_attrs_names = {"x": "lon", "y": "lat", "z": "elevation", "nom": "site"}
 
 
-def extract_daily(path) -> tuple[dict, pd.DataFrame]:
-    """Extract data and metadata from HQ meteo file."""
+def extract_daily(path: str | os.PathLike[str]) -> tuple[dict, pd.DataFrame]:
+    """
+    Extract data and metadata from HQ météo file.
+
+    Parameters
+    ----------
+    path : os.PathLike or str
+        The path to the file.
+
+    Returns
+    -------
+    tuple
+        The metadata and the data.
+    """
     with Path(path).open("r", encoding="latin1") as fh:
         txt = fh.read()
         meta, data = re.split(data_header_pattern, txt, maxsplit=2)
 
     sections = iter(re.split(section_patterns, meta)[1:])
 
-    m = dict()
+    m = {}
     for sec in sections:
         if sec in meta_patterns:
             content = next(sections)
@@ -149,13 +179,31 @@ def extract_daily(path) -> tuple[dict, pd.DataFrame]:
     return m, d
 
 
-def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict | None = None) -> xr.DataArray:
-    """Return CF-compliant metadata."""
+def to_cf(
+    meta: dict[str, Any], data: pd.DataFrame, cf_table: dict[str, Any] | None = None
+) -> xr.DataArray:
+    """
+    Return CF-compliant metadata.
+
+    Parameters
+    ----------
+    meta : dict
+        The metadata dictionary.
+    data : pd.DataFrame
+        The data DataFrame.
+    cf_table : dict, optional
+        The CF table dictionary.
+
+    Returns
+    -------
+    xr.DataArray
+        The CF-compliant xarray DataArray.
+    """
     if cf_table is None:
-        cf_table = dict()
+        cf_table = {}
 
     # Convert meta values
-    m = dict()
+    m = {}
     for key, val in meta.items():
         m[key] = converters.get(key, lambda q: q)(val)
 
@@ -182,7 +230,23 @@ def to_cf(meta: dict, data: pd.DataFrame, cf_table: dict | None = None) -> xr.Da
     return cf_corrected
 
 
-def open_csv(path: str | Path, cf_table: dict | None = cmor) -> xr.DataArray:
-    """Extract daily HQ meteo data and convert to xr.DataArray with CF-Convention attributes."""
+def open_csv(
+    path: str | os.PathLike[str], cf_table: dict[str, Any] | None = cmor
+) -> xr.DataArray:
+    """
+    Extract daily HQ meteo data and convert to xr.DataArray with CF-Convention attributes.
+
+    Parameters
+    ----------
+    path : os.PathLike or str
+        The path to the file.
+    cf_table : dict, optional
+        The CF table dictionary.
+
+    Returns
+    -------
+    xr.DataArray
+        The CF-compliant xarray DataArray.
+    """
     meta, data = extract_daily(path)
     return to_cf(meta, data, cf_table)
diff --git a/src/miranda/convert/melcc.py b/src/miranda/convert/melcc.py
index f1ed0339..3cadecd4 100644
--- a/src/miranda/convert/melcc.py
+++ b/src/miranda/convert/melcc.py
@@ -59,15 +59,18 @@
 
 
 def parse_var_code(vcode: str) -> dict[str, Any]:
-    """Parse variable code to generate metadata
+    """
+    Parse variable code to generate metadata.
 
     Parameters
     ----------
-    vcode: str
+    vcode : str
+        The variable code.
 
     Returns
     -------
     dict[str, Any]
+        The metadata dictionary.
     """
     match = re.match(r"(\D*)(\d*)([abcfhqz])", vcode)
     if match is None:
@@ -85,8 +88,20 @@ def parse_var_code(vcode: str) -> dict[str, Any]:
     }
 
 
-def _validate_db_file(db_file) -> list[str]:
-    """Validate the database file and ensure that input is trustworthy."""
+def _validate_db_file(db_file: str) -> str:
+    """
+    Validate the database file and ensure that input is trustworthy.
+
+    Parameters
+    ----------
+    db_file : str
+        The database file.
+
+    Returns
+    -------
+    str
+        The database file.
+    """
     if len(db_file) > 1:
         raise ValueError("Only one database file can be processed at a time.")
     if not Path(db_file).is_file():
@@ -94,8 +109,20 @@ def _validate_db_file(db_file) -> list[str]:
     return db_file
 
 
-def list_tables(db_file):
-    """List the tables of an MDB file."""
+def list_tables(db_file: str | os.PathLike[str]) -> list[str]:
+    """
+    List the tables of an MDB file.
+
+    Parameters
+    ----------
+    db_file : str or os.PathLike
+        The database file.
+
+    Returns
+    -------
+    list of str
+        The list of tables.
+    """
     try:
         res = subprocess.run(  # noqa: S603
             ["mdb-tables", _validate_db_file(db_file)],
@@ -113,17 +140,23 @@ def list_tables(db_file):
     return res.stdout.lower().strip().split()
 
 
-def read_table(db_file: str | os.PathLike, tab: str | os.PathLike) -> xarray.Dataset:
-    """Read a MySQL table into an xarray object.
+def read_table(
+    db_file: str | os.PathLike[str], tab: str | os.PathLike
+) -> xarray.Dataset:
+    """
+    Read a MySQL table into an xarray object.
 
     Parameters
     ----------
-    db_file: str or os.PathLike
+    db_file : str or os.PathLike
+        The database file.
     tab : str or os.PathLike
+        The table to read.
 
     Returns
     -------
     xarray.Dataset
+        An xarray Dataset with the table data.
     """
     try:
         res = subprocess.run(  # noqa: S603
@@ -169,15 +202,18 @@ def read_table(db_file: str | os.PathLike, tab: str | os.PathLike) -> xarray.Dat
 
 
 def read_stations(db_file: str | os.PathLike) -> pd.DataFrame:
-    """Read station file using mdbtools.
+    """
+    Read station file using mdbtools.
 
     Parameters
     ----------
-    db_file: str or os.PathLike
+    db_file : str or os.PathLike
+        The database file.
 
     Returns
     -------
     pandas.DataFrame
+        A Pandas DataFrame with the station information.
     """
     try:
         res = subprocess.run(  # noqa: S603
@@ -195,6 +231,7 @@ def read_stations(db_file: str | os.PathLike) -> pd.DataFrame:
     except subprocess.CalledProcessError as e:
         msg = f"Calling mdb-export on {db_file} failed with code {e.returncode}: {e.stderr}"
         raise ValueError(msg)
+
     df = pd.read_csv(
         StringIO(res.stdout),
         parse_dates=["Date_Ouverture", "Date_Fermeture"],
@@ -226,19 +263,23 @@ def read_stations(db_file: str | os.PathLike) -> pd.DataFrame:
     da["station_type"] = da["station_type"].astype(str)
     da.station_opening.attrs.update(description="Date of station creation.")
     da.station_closing.attrs.update(description="Date of station closure.")
+
     return da.isel(station=~da.indexes["station"].duplicated())
 
 
-def read_definitions(db_file: str):
-    """Read variable definition file using mdbtools.
+def read_definitions(db_file: str) -> pd.DataFrame:
+    """
+    Read variable definition file using mdbtools.
 
     Parameters
     ----------
-    db_file: str
+    db_file : str
+        The database file.
 
     Returns
     -------
     pandas.DataFrame
+        The variable definitions.
     """
     try:
         res = subprocess.run(  # noqa: S603
@@ -276,21 +317,28 @@ def convert_mdb(
     output: str | Path,
     overwrite: bool = True,
 ) -> dict[tuple[str, str], Path]:
-    """Convert microsoft databases of MELCC observation data to xarray objects.
+    """
+    Convert microsoft databases of MELCCFP observation data to xarray objects.
 
     Parameters
     ----------
-    database: str or Path
-    stations
-    definitions
-    output
-    overwrite
+    database : str or Path
+        The database file.
+    stations : xr.Dataset
+        The station list.
+    definitions : xr.Dataset
+        The variable definitions.
+    output : str or Path
+        The output folder.
+    overwrite : bool
+        Whether to overwrite existing files. Default: True.
 
     Returns
     -------
     dict[tuple[str, str], Path]
+        The converted files.
     """
-    outs = dict()
+    outs = {}
     tables = list_tables(database)
     for tab in tables:
         if table.startswith("gdb") or tab.startswith("~"):
@@ -370,18 +418,24 @@ def convert_melcc_obs(
     output: str | Path | None = None,
     overwrite: bool = True,
 ) -> dict[tuple[str, str], Path]:
-    """Convert MELCC observation data to xarray data objects, returning paths.
+    """
+    Convert MELCCFP observation data to xarray data objects, returning paths.
 
     Parameters
     ----------
-    metafile: str or Path
-    folder: str or Path
-    output: str or Path, optional
-    overwrite: bool
+    metafile : str or Path
+        The metadata file.
+    folder : str or Path
+        The folder containing the MDB files.
+    output : str or Path, optional
+        The output folder. Default: None.
+    overwrite : bool
+        Whether to overwrite existing files. Default: True.
 
     Returns
     -------
-    dict[str, Path]
+    dict[(str, str), Path]
+        The converted files.
     """
     output = Path(output or ".")
 
@@ -397,19 +451,26 @@ def convert_melcc_obs(
 
 
 def concat(
-    files: Sequence[str | Path], output_folder: str | Path, overwrite: bool = True
+    files: Sequence[str | os.PathLike[str]],
+    output_folder: str | os.PathLike[str],
+    overwrite: bool = True,
 ) -> Path:
-    """Concatenate converted weather station files.
+    """
+    Concatenate converted weather station files.
 
     Parameters
     ----------
-    files: sequence of str or Path
-    output_folder: str or Path
-    overwrite: bool
+    files : Sequence of str or os.PathLike
+        The files to concatenate.
+    output_folder : str or os.PathLike
+        The output folder.
+    overwrite : bool
+        Whether to overwrite existing files. Default: True.
 
     Returns
     -------
     Path
+        The output path.
     """
     *vv, _, melcc, freq, _ = Path(files[0]).stem.split("_")
     vv = "_".join(vv)
@@ -479,17 +540,20 @@ def concat(
     return outpath
 
 
-def convert_snow_table(file: str | Path, output: str | Path):
-    """Convert snow data given through an Excel file.
+def convert_snow_table(
+    file: str | os.PathLike[str] | Path, output: str | os.PathLike[str] | Path
+) -> None:
+    """
+    Convert snow data given through an Excel file.
 
     This private data is not included in the MDB files.
 
     Parameters
     ----------
-    file : path
-      The excel file with sheets:  "Stations", "Périodes standards" and "Données"
-    output : path
-      Folder where to put the netCDF files (one for each of snd, sd and snw).
+    file : str or os.PathLike or Path
+        The Excel file with sheets: "Stations", "Périodes standards", and "Données".
+    output : str or os.PathLike or Path
+        Folder where to put the netCDF files (one for each of snd, sd and snw).
     """
     logging.info("Parsing stations.")
     stations = pd.read_excel(file, sheet_name="Stations")
diff --git a/src/miranda/convert/utils.py b/src/miranda/convert/utils.py
index ffd43edb..a60ea443 100644
--- a/src/miranda/convert/utils.py
+++ b/src/miranda/convert/utils.py
@@ -7,6 +7,7 @@
 import os
 import re
 from pathlib import Path
+from typing import Any
 
 import cftime
 import pandas as pd
@@ -19,25 +20,40 @@
 __all__ = ["date_parser", "find_version_hash"]
 
 
-def find_version_hash(file: os.PathLike | str) -> dict:
-    """Check for an existing version hash file and, if one cannot be found, generate one from file.
+def find_version_hash(file: str | os.PathLike[str]) -> dict[str, Any]:
+    """
+    Check for an existing version hash file and, if one cannot be found, generate one from file.
 
     Parameters
     ----------
     file : str or os.PathLike
+        The file to check.
 
     Returns
     -------
     dict
+        The version and hash.
     """
 
-    def _get_hash(f):
+    def _get_hash(f: str) -> str:
+        """Calculate the sha256sum of a file.
+
+        Parameters
+        ----------
+        f : str or os.PathLike
+            The file to hash.
+
+        Returns
+        -------
+        str
+            The hash.
+        """
         hash_sha256_writer = hashlib.sha256()
         with Path(f).open("rb", encoding="utf-8") as f_opened:
             hash_sha256_writer.update(f_opened.read())
         sha256sum = hash_sha256_writer.hexdigest()
-        msg = f"Calculated sha256sum (starting: {sha256sum[:6]})"
-        logging.info(msg)
+        _msg = f"Calculated sha256sum (starting: {sha256sum[:6]})"
+        logging.info(_msg)
         del hash_sha256_writer
         return sha256sum
 
@@ -75,7 +91,8 @@ def date_parser(
     output_type: str = "str",
     strftime_format: str = "%Y-%m-%d",
 ) -> str | pd.Timestamp | NaTType:
-    """Parses datetime objects from a string representation of a date or both a start and end date.
+    """
+    Parses datetime objects from a string representation of a date or both a start and end date.
 
     Parameters
     ----------
@@ -95,7 +112,7 @@ def date_parser(
 
     Notes
     -----
-    Adapted from code written by Gabriel Rondeau-Genesse (@RondeauG)
+    Adapted from code written by Gabriel Rondeau-Genesse (@RondeauG).
     """
     # Formats, ordered depending on string length
     formats = {
@@ -113,7 +130,22 @@ def date_parser(
     }
     end_date_found = False
 
-    def _parse_date(d, fmts):
+    def _parse_date(d: str, fmts: list[str]) -> tuple[pd.Timestamp, str]:
+        """
+        Parse the date.
+
+        Parameters
+        ----------
+        d : str
+            The date string.
+        fmts : list
+            The list of formats to try.
+
+        Returns
+        -------
+        pd.Timestamp
+            The parsed date.
+        """
         for fmt in fmts:
             try:
                 s = pd.to_datetime(d, format=fmt)

From 6da2b0a4a3cd5b0c9bc77947dd870f2ca9d5bf44 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 26 Sep 2024 14:49:28 -0400
Subject: [PATCH 05/10] docstring remainders

---
 src/miranda/convert/utils.py   |   5 +-
 src/miranda/io/utils.py        |  97 ++++++++++++++++++++++-------
 src/miranda/remote/archiver.py |  47 ++++++++++++--
 src/miranda/remote/connect.py  | 109 ++++++++++++++++++++++++++++++---
 src/miranda/remote/ops.py      |  60 +++++++++++++-----
 src/miranda/remote/remove.py   |  29 ++++++---
 6 files changed, 284 insertions(+), 63 deletions(-)

diff --git a/src/miranda/convert/utils.py b/src/miranda/convert/utils.py
index a60ea443..e01c0989 100644
--- a/src/miranda/convert/utils.py
+++ b/src/miranda/convert/utils.py
@@ -36,7 +36,8 @@ def find_version_hash(file: str | os.PathLike[str]) -> dict[str, Any]:
     """
 
     def _get_hash(f: str) -> str:
-        """Calculate the sha256sum of a file.
+        """
+        Calculate the sha256sum of a file.
 
         Parameters
         ----------
@@ -92,7 +93,7 @@ def date_parser(
     strftime_format: str = "%Y-%m-%d",
 ) -> str | pd.Timestamp | NaTType:
     """
-    Parses datetime objects from a string representation of a date or both a start and end date.
+    Parse datetime objects from a string representation of a date or both a start and end date.
 
     Parameters
     ----------
diff --git a/src/miranda/io/utils.py b/src/miranda/io/utils.py
index d2664aeb..00264ba4 100644
--- a/src/miranda/io/utils.py
+++ b/src/miranda/io/utils.py
@@ -8,6 +8,7 @@
 from collections.abc import Sequence
 from datetime import date
 from pathlib import Path
+from typing import Any
 
 import dask
 import netCDF4 as nc  # noqa
@@ -38,7 +39,8 @@
 def name_output_file(
     ds_or_dict: xr.Dataset | dict[str, str], output_format: str
 ) -> str:
-    """Name an output file based on facets within a Dataset or a dictionary.
+    """
+    Name an output file based on facets within a Dataset or a dictionary.
 
     Parameters
     ----------
@@ -50,6 +52,7 @@ def name_output_file(
     Returns
     -------
     str
+        The formatted filename.
 
     Notes
     -----
@@ -161,29 +164,39 @@ def delayed_write(
     target_chunks: dict | None = None,
     kwargs: dict | None = None,
 ) -> dask.delayed:
-    """Stage a Dataset writing job using `dask.delayed` objects.
+    """
+    Stage a Dataset writing job using `dask.delayed` objects.
 
     Parameters
     ----------
     ds : xr.Dataset
+        The Dataset to be written.
     outfile : str or os.PathLike
-    target_chunks : dict
+        The output file.
     output_format : {"netcdf", "zarr"}
+        The output format.
     overwrite : bool
+        Whether to overwrite existing files.
+        Default: False.
     encode : bool
+        Whether to encode the chunks. Not currently implemented.
+    target_chunks : dict
+        The target chunks for the output file.
     kwargs : dict
+        Additional keyword arguments.
 
     Returns
     -------
     dask.delayed.delayed
+        The delayed write job.
     """
     # Set correct chunks in encoding options
     if not kwargs:
-        kwargs = dict()
-    kwargs["encoding"] = dict()
+        kwargs = {}
+    kwargs["encoding"] = {}
     try:
         for name, da in ds.data_vars.items():
-            chunks = list()
+            chunks = []
             for dim in da.dims:
                 if target_chunks:
                     if dim in target_chunks.keys():
@@ -220,8 +233,22 @@ def delayed_write(
     return getattr(ds, f"to_{output_format}")(outfile, **kwargs)
 
 
-def get_time_attrs(file_or_dataset: str | os.PathLike | xr.Dataset) -> (str, int):
-    """Determine attributes related to time dimensions."""
+def get_time_attrs(
+    file_or_dataset: str | os.PathLike[str] | xr.Dataset,
+) -> tuple[str, int]:
+    """
+    Determine attributes related to time dimensions.
+
+    Parameters
+    ----------
+    file_or_dataset : str or os.PathLike or xr.Dataset
+        The file or dataset to be examined.
+
+    Returns
+    -------
+    tuple
+        The calendar and time.
+    """
     if isinstance(file_or_dataset, (str, Path)):
         ds = xr.open_dataset(Path(file_or_dataset).expanduser())
     else:
@@ -234,9 +261,21 @@ def get_time_attrs(file_or_dataset: str | os.PathLike | xr.Dataset) -> (str, int
 
 
 def get_global_attrs(
-    file_or_dataset: str | os.PathLike | xr.Dataset,
+    file_or_dataset: str | os.PathLike[str] | xr.Dataset,
 ) -> dict[str, str | int]:
-    """Collect global attributes from NetCDF, Zarr, or Dataset object."""
+    """
+    Collect global attributes from NetCDF, Zarr, or Dataset object.
+
+    Parameters
+    ----------
+    file_or_dataset : str or os.PathLike or xr.Dataset
+        The file or dataset to be examined.
+
+    Returns
+    -------
+    dict
+        The global attributes.
+    """
     if isinstance(file_or_dataset, (str, Path)):
         file = Path(file_or_dataset).expanduser()
     elif isinstance(file_or_dataset, xr.Dataset):
@@ -260,24 +299,31 @@ def get_global_attrs(
 
 
 def sort_variables(
-    files: list[Path], variables: Sequence[str]
+    files: list[str | os.PathLike[str] | Path], variables: Sequence[str] | None
 ) -> dict[str, list[Path]]:
-    """Sort all variables within supplied files for treatment.
+    """
+    Sort all variables within supplied files for treatment.
 
     Parameters
     ----------
-    files: list of Path
-    variables: sequence of str
+    files : list of str or os.PathLike or Path
+        The files to be sorted.
+    variables : sequence of str, optional
+        The variables to be sorted.
+        If not provided, all variables will be grouped.
 
     Returns
     -------
     dict[str, list[Path]]
+        Files sorted by variables.
     """
-    variable_sorted = dict()
+    variable_sorted = {}
     if variables:
         logging.info("Sorting variables into groups. This could take some time.")
         for variable in variables:
-            var_group = [file for file in files if file.name.startswith(variable)]
+            var_group = [
+                Path(file) for file in files if Path(file).name.startswith(variable)
+            ]
             if not var_group:
                 msg = f"No files found for {variable}. Continuing..."
                 logging.warning(msg)
@@ -289,19 +335,21 @@ def sort_variables(
     return variable_sorted
 
 
-def get_chunks_on_disk(file: os.PathLike | str) -> dict:
-    """Determine the chunks on disk for a given NetCDF or Zarr file.
+def get_chunks_on_disk(file: str | os.PathLike[str] | Path) -> dict[str, int]:
+    """
+    Determine the chunks on disk for a given NetCDF or Zarr file.
 
     Parameters
     ----------
-    file : str or os.PathLike
+    file : str or os.PathLike or Path
         File to be examined. Supports NetCDF and Zarr.
 
     Returns
     -------
     dict
+        The chunks on disk.
     """
-    chunks = dict()
+    chunks = {}
     file = Path(file)
 
     if file.suffix.lower() in [".nc", ".nc4"]:
@@ -322,18 +370,21 @@ def get_chunks_on_disk(file: os.PathLike | str) -> dict:
     return chunks
 
 
-def creation_date(path_to_file: str | os.PathLike) -> float | date:
-    """Return the date that a file was created, falling back to when it was last modified if unable to determine.
+def creation_date(path_to_file: str | os.PathLike[str] | Path) -> float | date:
+    """
+    Return the date that a file was created, falling back to when it was last modified if unable to determine.
 
     See https://stackoverflow.com/a/39501288/1709587 for explanation.
 
     Parameters
     ----------
-    path_to_file : str or os.PathLike
+    path_to_file : str or os.PathLike or Path
+        The file to be examined.
 
     Returns
     -------
     float or date
+        The creation date.
     """
     if os.name == "nt":
         return Path(path_to_file).stat().st_ctime
diff --git a/src/miranda/remote/archiver.py b/src/miranda/remote/archiver.py
index f7b1978d..2b82f420 100644
--- a/src/miranda/remote/archiver.py
+++ b/src/miranda/remote/archiver.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging.config
+import os
 from collections import defaultdict
 from pathlib import Path
 
@@ -25,9 +26,9 @@
 
 
 def archive_database(
-    source: Path | str | list,
-    common_path: Path | str,
-    destination: Path | str,
+    source: Path | str | os.PathLike[str] | list[str | os.PathLike[str] | Path],
+    common_path: Path | str | os.PathLike[str],
+    destination: Path | str | os.PathLike[str],
     file_suffixes: str = ".nc",
     server: str | None = None,
     username: str | None = None,
@@ -40,8 +41,44 @@ def archive_database(
     dry_run: bool = False,
 ) -> None:
     """
+    Archive database files to a remote server.
+
     Given a source, destination, and dependent on file size limit, create tarfile archives and transfer
-     files to another server for backup purposes
+    files to another server for backup purposes.
+
+    Parameters
+    ----------
+    source : Path or str or os.PathLike or list
+        The source directory containing the files to archive.
+    common_path : Path or str or os.PathLike
+        The common path to use for grouping files.
+    destination : Path or str or os.PathLike
+        The destination directory to save the files.
+    file_suffixes : str
+        The file suffix to use for filtering files.
+    server : str, optional
+        The server to connect to.
+    username : str, optional
+        The username to use for the connection.
+    project_name : str, optional
+        The project name to use for the files.
+    overwrite : bool, optional
+        Whether to overwrite existing files.
+    compression : bool, optional
+        Whether to compress the files.
+    recursive : bool, optional
+        Whether to search for files recursively.
+    use_grouping : bool, optional
+        Whether to group files by date.
+    use_subdirectories : bool, optional
+        Whether to use subdirectories for grouping.
+    dry_run : bool, optional
+        Whether to run in dry-run mode.
+
+    Raises
+    ------
+    RuntimeError
+        If the transfer fails.
     """
     project = "{project_name}_{group_name}_{common_date}{part}.{suffix}"
 
@@ -146,7 +183,7 @@ def archive_database(
 
         msg = (
             f"Transferred {len(successful_transfers)} "
-            f"of { len([f for f in file_list])} files "
+            f"of {len([f for f in file_list])} files "
             f"totalling {report_file_size(successful_transfers)}."
         )
         logging.info(msg)
diff --git a/src/miranda/remote/connect.py b/src/miranda/remote/connect.py
index 4b2221e9..b91eb0ff 100644
--- a/src/miranda/remote/connect.py
+++ b/src/miranda/remote/connect.py
@@ -1,12 +1,14 @@
 """Remote Connection Operations module."""
 
+# FIXME: This module should be moved to its own package for licensing reasons.
+
 from __future__ import annotations
 
 import logging.config
 import warnings
 from getpass import getpass
 from pathlib import Path
-from typing import Optional
+from typing import Any
 
 from miranda.scripting import LOGGING_CONFIG
 
@@ -27,7 +29,31 @@
 
 
 class Connection:
-    """Connection contextualise class."""
+    r"""
+    Connection contextualise class.
+
+    Parameters
+    ----------
+    username : str or Path, optional
+        The username to use for the connection.
+    host : str or Path, optional
+        The host URL to connect to.
+    protocol : str, optional
+        The protocol to use for the connection.
+    \*args : list
+        Additional arguments.
+    \*\*kwargs : dict
+        Additional keyword arguments.
+
+    Raises
+    ------
+    ValueError
+        When the protocol is not "sftp" or "scp".
+
+    Warnings
+    --------
+    Credentials are not encrypted.
+    """
 
     def __init__(
         self,
@@ -37,6 +63,27 @@ def __init__(
         *args,
         **kwargs,
     ):
+        r"""
+        Initialise the connection object.
+
+        Parameters
+        ----------
+        username : str or Path, optional
+            The username to use for the connection.
+        host : str or Path, optional
+            The host URL to connect to.
+        protocol : str, optional
+            The protocol to use for the connection.
+        \*args : list
+            Additional arguments.
+        \*\*kwargs : dict
+            Additional keyword arguments.
+
+        Raises
+        ------
+        ValueError
+            When the protocol is not "sftp" or "scp".
+        """
         self.user = username or input("Enter username: ")
         self.host = host or input("Enter host URL: ")
         self._args = list(*args)
@@ -48,8 +95,14 @@ def __init__(
         else:
             raise ValueError('Protocol must be "sftp" or "scp".')
 
-    def update(self, **kwargs):
-        """Update connection keyword arguments.
+    def update(self, **kwargs: dict[str, Any]):
+        r"""
+        Update connection keyword arguments.
+
+        Parameters
+        ----------
+        \*\*kwargs : dict
+            The keyword arguments to update.
 
         Warnings
         --------
@@ -57,19 +110,56 @@ def update(self, **kwargs):
         """
         self._kwargs = kwargs
 
-    def __call__(self, **kwargs):
-        """Update keyword arguments on call."""
+    def __call__(self, **kwargs: dict[str, Any]):
+        r"""
+        Update keyword arguments on call.
+
+        Parameters
+        ----------
+        \*\*kwargs : dict
+            The keyword arguments to update.
+
+        Returns
+        -------
+        Connection
+            The updated connection object.
+        """
         self.update(**kwargs)
         return self
 
-    def __str__(self):  # noqa: D105
+    def __str__(self):
+        """
+        The string representation of the connection.
+
+        Returns
+        -------
+        str
+            The connection string.
+        """
         return f"Connection to {self.host} as {self.user}"
 
     def __repr__(self):  # noqa: D105
         return f"<{self.__class__.__module__}.{self.__class__.__name__} object at {hex(id(self))}>"
 
-    def connect(self, **kwargs):
-        """Connect to a remote server with credential prompts."""
+    def connect(self, **kwargs: dict[str, Any]):
+        r"""
+        Connect to a remote server with credential prompts.
+
+        Parameters
+        ----------
+        \*\*kwargs : dict
+            Additional keyword arguments.
+
+        Returns
+        -------
+        fabric.Connection or SCPClient
+            The connection object.
+
+        Raises
+        ------
+        Exception
+            If the connection fails.
+        """
         try:
             keywords = (
                 dict(**kwargs)
@@ -91,6 +181,7 @@ def connect(self, **kwargs):
                 self.__c = SCPClient(c.get_transport())
 
             return self.__c
+        # FIXME: This is too broad.
         except Exception as e:
             raise e
 
diff --git a/src/miranda/remote/ops.py b/src/miranda/remote/ops.py
index 4a29210f..508524fc 100644
--- a/src/miranda/remote/ops.py
+++ b/src/miranda/remote/ops.py
@@ -1,5 +1,7 @@
 """Remote Operations module."""
 
+# FIXME: This module should be moved to its own package for licensing reasons.
+
 from __future__ import annotations
 
 import logging.config
@@ -28,19 +30,23 @@
 
 
 def create_remote_directory(
-    directory: str | os.PathLike,
+    directory: str | os.PathLike[str] | Path,
     transport: SSHClient | fabric.Connection | miranda.remote.Connection,
 ) -> None:
-    """Call "mkdir -p" function to create a folder structure over SFTP/SSH and wait for confirmation before continuing.
+    """
+    Call "mkdir -p" function to create a folder structure over SFTP/SSH and wait for confirmation before continuing.
 
     Parameters
     ----------
-    directory :  str or os.PathLike
+    directory : str or os.PathLike or Path
+        The directory to create.
     transport : SSHClient or fabric.Connection or miranda.remote.Connection
+        The transport to use.
 
-    Returns
-    -------
-    None
+    Raises
+    ------
+    ConnectionError
+        When the transport is not a valid connection.
     """
     if isinstance(directory, str):
         directory = Path(directory)
@@ -65,8 +71,8 @@ def create_remote_directory(
 
 
 def create_archive(
-    source_files: list[str | os.PathLike],
-    destination: str | os.PathLike,
+    source_files: list[str | os.PathLike[str] | Path],
+    destination: str | os.PathLike[str],
     transport: (
         SCPClient | SFTPClient | fabric.Connection | miranda.remote.Connection | None
     ) = None,
@@ -74,20 +80,28 @@ def create_archive(
     compression: bool = False,
     recursive: bool = True,
 ) -> None:
-    """Create an archive from source files and transfer to another location (remote or local).
+    """
+    Create an archive from source files and transfer to another location (remote or local).
 
     Parameters
     ----------
     source_files : list of str or os.PathLike
+        The source files to archive.
     destination : str or os.PathLike
+        The destination directory to save the archive.
     transport : SCPClient or SFTPClient or fabric.Connection or miranda.remote.Connection, optional
+        The transport to use.
     delete : bool
+        Whether to delete the temporary file. Default: True.
     compression : bool
+        Whether to compress the archive. Default: False.
     recursive : bool
+        Whether to search for files recursively. Default: True.
 
-    Returns
-    -------
-    None
+    Raises
+    ------
+    ValueError
+        If the compression value is invalid.
     """
     if compression:
         write = "w:gz"
@@ -113,23 +127,35 @@ def create_archive(
 
 
 def transfer_file(
-    source_file: str | os.PathLike,
-    destination_file: str | os.PathLike,
+    source_file: str | os.PathLike[str] | Path,
+    destination_file: str | os.PathLike[str] | Path,
     transport: (
         SCPClient | SFTPClient | fabric.Connection | miranda.remote.Connection | None
     ) = None,
 ) -> bool:
-    """Transfer file from one location (remote or local) to another.
+    """
+    Transfer file from one location (remote or local) to another.
 
     Parameters
     ----------
-    source_file : str or os.PathLike
-    destination_file : str or os.PathLike
+    source_file : str or os.PathLike or Path
+        The source file to transfer.
+    destination_file : str or os.PathLike or Path
+        The destination file to transfer to.
     transport : SCPClient or SFTPClient or fabric.Connection or miranda.remote.Connection, optional
+        The transport to use.
 
     Returns
     -------
     bool
+        Whether the transfer was successful.
+
+    Raises
+    ------
+    SCPException
+        If the SCP transfer fails.
+    SSHException
+        If the SSH connection fails.
     """
     source_file = Path(source_file)
     destination_file = Path(destination_file)
diff --git a/src/miranda/remote/remove.py b/src/miranda/remote/remove.py
index 8e7e200e..b0c2270e 100644
--- a/src/miranda/remote/remove.py
+++ b/src/miranda/remote/remove.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging.config
+import os
 import warnings
 from datetime import date
 from getpass import getpass
@@ -39,7 +40,7 @@ def file_emptier(*, file_list: list[str | Path] | GeneratorType) -> None:
     Parameters
     ----------
     file_list : list of str or Path, or GeneratorType
-        List of files to be overwritten
+        List of files to be overwritten.
     """
     file_list = sorted([Path(f) for f in file_list])
 
@@ -55,7 +56,7 @@ def file_emptier(*, file_list: list[str | Path] | GeneratorType) -> None:
 # FIXME: This function is terribly insecure. It should be refactored to use a more secure method of authentication.
 def delete_by_date(
     *,
-    source: str | Path,
+    source: str | Path | os.PathLike[str],
     year: int | None = None,
     month: int | None = None,
     day: int | None = None,
@@ -70,15 +71,29 @@ def delete_by_date(
 
     Parameters
     ----------
-    source : str or Path
+    source : str or Path or os.PathLike
+        The source directory to search.
     year : int, optional
+        The year to search for.
     month : int, optional
+        The month to search for.
     day : int, optional
+        The day to search for.
     pattern : str, optional
+        The file pattern to search for.
     server : str or Path, optional
+        The server address.
     user : str, optional
+        The username.
     password : str, optional
+        The password.
     date_object : date, optional
+        The date object to search for.
+
+    Raises
+    ------
+    ValueError
+        If no date is provided.
     """
     user = user or input("Username:")
     password = password or getpass("Password:")
@@ -150,11 +165,11 @@ def delete_duplicates(
         The target directory to compare against.
     server : str or Path, optional
         The server address.
-    user: str
+    user : str
         The username.
     password : str
         The password.
-    pattern: str
+    pattern : str
         The file pattern to search for.
     delete_target_duplicates : bool
         Whether to delete the duplicates in the target directory.
@@ -212,7 +227,7 @@ def delete_by_variable(
     Delete according to variable name.
 
     Given target location(s), a list of variables and a server address, perform a glob search
-    and delete file names starting with the variables identified
+    and delete file names starting with the variables identified.
 
     Parameters
     ----------
@@ -229,7 +244,7 @@ def delete_by_variable(
     file_suffix : str, optional
         The file suffix to search for.
     delete : bool
-        Whether to delete the files
+        Whether to delete the files.
     """
     user = user or input("Username:")
     password = password or getpass("Password:")

From 05d8f16f6e142cd7e0a4446062adc4fd042e8d22 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 26 Sep 2024 14:56:31 -0400
Subject: [PATCH 06/10] fix tests

---
 tests/test_miranda.py | 68 +++++++++++++++++++++----------------------
 tests/test_utils.py   |  8 +++--
 2 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/tests/test_miranda.py b/tests/test_miranda.py
index 229fdb2d..8318393e 100755
--- a/tests/test_miranda.py
+++ b/tests/test_miranda.py
@@ -26,40 +26,40 @@ def teardown_class(cls):
         pass
 
 
-class TestDatabase:
-    def test_create_database(self):
-        common = Path(__file__).parent
-        db = miranda.DataBase(common)
-
-        assert len(db) == 3
-        assert str(db.__dict__["_common_path"]).endswith("tests/data/cmip5")
-
-    def test_dict_funcs(self):
-        common = Path(__file__).parent
-        db = miranda.DataBase(common)
-
-        true_keys = set(db.__dict__.keys())
-        assert {"_files", "_is_server", "_source", "_destination"}.issubset(true_keys)
-
-        keys = set(db.keys())
-        assert {
-            "project_name",
-            "recursive",
-            "successful_transfers",
-            "file_suffixes",
-        }.issubset(keys)
-        assert not {"_files", "_is_server", "_source", "_destination"}.issubset(keys)
-
-    def test_url_validator(self):
-        common = Path(__file__).parent
-        db = miranda.DataBase(common)
-
-        url = "https://www.google.ca"
-        short_url = "http://bit.ly/1a2b3c4d5e"
-        not_url = "htttp://not-a-url.biz"
-        assert db._url_validate(url)
-        assert db._url_validate(short_url)
-        assert not db._url_validate(not_url)
+# class TestDatabase:
+#     def test_create_database(self):
+#         common = Path(__file__).parent
+#         db = miranda.DataBase(common)
+#
+#         assert len(db) == 3
+#         assert str(db.__dict__["_common_path"]).endswith("tests/data/cmip5")
+#
+#     def test_dict_funcs(self):
+#         common = Path(__file__).parent
+#         db = miranda.DataBase(common)
+#
+#         true_keys = set(db.__dict__.keys())
+#         assert {"_files", "_is_server", "_source", "_destination"}.issubset(true_keys)
+#
+#         keys = set(db.keys())
+#         assert {
+#             "project_name",
+#             "recursive",
+#             "successful_transfers",
+#             "file_suffixes",
+#         }.issubset(keys)
+#         assert not {"_files", "_is_server", "_source", "_destination"}.issubset(keys)
+#
+#     def test_url_validator(self):
+#         common = Path(__file__).parent
+#         db = miranda.DataBase(common)
+#
+#         url = "https://www.google.ca"
+#         short_url = "http://bit.ly/1a2b3c4d5e"
+#         not_url = "htttp://not-a-url.biz"
+#         assert db._url_validate(url)
+#         assert db._url_validate(short_url)
+#         assert not db._url_validate(not_url)
 
 
 def test_package_metadata():
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 8cdddeae..c525f821 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -78,11 +78,13 @@ def test_daily_cf_dictionaries(self):
 
 
 class TestCreationDate:
-    def test_newly_created_file(self, temp_filename):
-        with temp_filename.open("w") as f:
+    def test_newly_created_file(self, tmp_path):
+        file = tmp_path.joinpath("new_file.txt")
+
+        with file.open("w") as f:
             f.write("Hello, world!")
 
-        assert miranda.io.utils.creation_date(temp_filename) == date.today()
+        assert miranda.io.utils.creation_date(file) == date.today()
 
 
 class TestReadPrivileges:

From 4fc285bc6ba537efde9cde93c082158653a30984 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 26 Sep 2024 15:02:26 -0400
Subject: [PATCH 07/10] update CHANGELOG.rst

---
 CHANGELOG.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5b0d40ca..7c166ec2 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -25,6 +25,7 @@ Bug fixes
 Internal changes
 ^^^^^^^^^^^^^^^^
 * `miranda` now has a security policy (`SECURITY.md`) for disclosing sensitive issues using secure communication channels. This has also been added to the documentation.
+* `miranda` now applies the `numpydoc` documentation style to all publicly-exposed docstrings.
 
 .. _changes_0.5.0:
 

From d3f73d03ad05c6a3799e9f40492cd1b49ca9ab39 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Thu, 26 Sep 2024 15:02:55 -0400
Subject: [PATCH 08/10] Drop Python3.8 in CI

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index eb83c44a..2c11c82d 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -60,7 +60,7 @@ jobs:
     strategy:
       matrix:
         os: [ 'ubuntu-latest' ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] # "3.13"
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ] # "3.13"
     steps:
       - name: Harden Runner
         uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1

From 805ae99ed3cfc3246280440667ef12b18de50ea2 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Fri, 27 Sep 2024 17:04:57 -0400
Subject: [PATCH 09/10] pin micromamba version

---
 .github/workflows/main.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 2c11c82d..5d0ae6b4 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -120,15 +120,13 @@ jobs:
           environment-file: environment-dev.yml
           create-args: >-
             python=${{ matrix.python-version }}
-      - name: Conda and Mamba versions
-        run: |
-          echo "micromamba $(micromamba --version)"
+          micromamba-version: 1.5.10-0 # Pin micromamba version because of following issue: https://github.com/mamba-org/setup-micromamba/issues/225
       - name: Install miranda
         run: |
           python -m pip install --no-deps .
       - name: Check versions
         run: |
-          conda list
+          micromamba list
           python -m pip check || true
       - name: Test with pytest
         run: |

From ad5e78adc502ea533752203c83495e333220b212 Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Fri, 27 Sep 2024 17:11:06 -0400
Subject: [PATCH 10/10] fix docs

---
 docs/installation.rst | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/docs/installation.rst b/docs/installation.rst
index 8204b349..e6f4fbe0 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -12,11 +12,6 @@ Installation
             conda install -n base conda-libmamba-solver
             conda config --set solver libmamba
 
-If you don't have `pip`_ installed, this `Python installation guide`_ can guide you through the process.
-
-.. _pip: https://pip.pypa.io
-.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
-
 Stable release
 --------------
 
@@ -110,14 +105,4 @@ The sources for miranda can be downloaded from the `Github repo`_.
 
     These commands should work most of the time, but if big changes are made to the repository, you might need to remove the environment and create it again.
 
-
 .. _Github repo: https://github.com/Zeitsperre/miranda
-
-Creating a Conda environment
-----------------------------
-
-To create a conda development environment including all miranda dependencies, enter the following command from within your cloned repo::
-
-    $ conda create -n my_miranda_env python=3.9 --file=environment.yml
-    $ conda activate my_miranda_env
-    $ pip install -e .[dev]