diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ffe6baa..637e195 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,7 +41,7 @@ repos: exclude_types: ["jupyter", "text"] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.3 + rev: v0.8.1 hooks: - id: ruff args: ["--fix", "--exit-non-zero-on-fix"] diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f8abf3..46b9949 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file. The format is based on [Common Changelog](https://common-changelog.org/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.12.0] - 2024-12-20 + +### Changed + +- Clean up docstrings, especially removing types that are already annotated in function signature ([#274](https://github.com/nasa/ncompare/issues/274)) ([**@danielfromearth**](https://github.com/danielfromearth)) + +### Added + +- Categorize counts of differences (including attributes) in a summary ([#276](https://github.com/nasa/ncompare/pull/276)) ([**@danielfromearth**](https://github.com/danielfromearth)) +- Include dimensions in variable attribute comparisons ([#277](https://github.com/nasa/ncompare/pull/277)) ([**@danielfromearth**](https://github.com/danielfromearth)) +- Provide numerical output where zero means no differences found ([#278](https://github.com/nasa/ncompare/pull/278)) ([**@danielfromearth**](https://github.com/danielfromearth)) + +### Removed + +- **Breaking:** drop support for randomized value checks, which are no longer part of API ([#271](https://github.com/nasa/ncompare/pull/271)) ([**@danielfromearth**](https://github.com/danielfromearth)) + +### Fixed + +- Catch "unsupported datatype" exception from netCDF library ([#268](https://github.com/nasa/ncompare/pull/268)) ([**@danielfromearth**](https://github.com/danielfromearth)) + ## [1.11.0] - 2024-11-14 ### Added diff --git a/README.md b/README.md index 57a0834..3233110 100644 --- a/README.md +++ b/README.md @@ -32,34 +32,27 @@ _____ DOI badge -Compare the structure of two NetCDF files at the command line. +Compare the structure of two NetCDF files at the command line or via Python. `ncompare` generates a view of the matching and non-matching groups and variables between two NetCDF datasets. ## Installing -The latest release of `ncompare` can be installed with `mamba`, `conda` or `pip`. - -#### Using `mamba` +The latest release of `ncompare` can be installed with `mamba`, `conda` or `pip`: ```bash mamba install -c conda-forge ncompare ``` - -#### Using `conda` - ```bash conda install -c conda-forge ncompare ``` - -#### Using `pip` - ```bash pip install ncompare ``` -## Usage +## Usage Examples +### At a command line: To compare two netCDF files, pass the filepaths for each of the two netCDF files directly to ncompare, as follows: @@ -77,23 +70,22 @@ a common use of _ncompare_ may look like this example: ncompare S001G01.nc S001G01_SUBSET.nc --file-text subset_comparison.txt ``` -**A more complete usage demonstration with example output is shown in -[this example notebook](https://ncompare.readthedocs.io/en/latest/example/ncompare-example-usage/).** - -### Options - -- `-h`, `--help` : Show this help message and exit. -- `--file-text` [FILE_PATH]: Text file to write output to. -- `--file-csv` [FILE_PATH]: Comma-separated values (CSV) file to write output to. -- `--file-xlsx` [FILE_PATH]: Excel file to write output to. -- `--only-diffs` : Only display variables and attributes that are different -- `--no-color` : Turn off all colorized output. -- `--show-attributes` : Include variable attributes in the table that compares variables. -- `--show-chunks` : Include chunk sizes in the table that compares variables. -- `-v` (`--comparison_var_name`) [VAR_NAME]: Compare specific values for this variable. -- `-g` (`--comparison_var_group`) [VAR_GROUP]: Group that contains the `comparison_var_name`. -- `--column-widths` [WIDTH, WIDTH, WIDTH]: Width, in number of characters, of the three columns in the comparison report -- `--version` : Show the current version and then exit. +### In a Python kernel: + +```python +from ncompare import compare + +total_number_of_differences = compare( + "", + "", + only_diffs=True, + show_attributes=True, + show_chunks=True, +) +``` + + +### More complete usage demonstrations, with example output, are shown in [this example notebook](https://ncompare.readthedocs.io/en/latest/example/ncompare-example-usage/). ## Contributing diff --git a/docs/example/ncompare-example-usage.ipynb b/docs/example/ncompare-example-usage.ipynb index 1fa7d2d..689cf5b 100644 --- a/docs/example/ncompare-example-usage.ipynb +++ b/docs/example/ncompare-example-usage.ipynb @@ -5,7 +5,7 @@ "id": "214b2e0a-4a8a-48bb-b1f5-b457b69ece57", "metadata": {}, "source": [ - "# Brief demonstration of `ncompare`: to compare the structure, groups, variables, and attributes of two netCDF files\"" + "# Brief demonstration of ncompare: comparing the structure, groups, variables, and attributes of two netCDF files" ] }, { @@ -23,6 +23,14 @@ "cell_type": "markdown", "id": "569c088b-0929-43c3-8d0f-6da3b6c89cce", "metadata": {}, + "source": [ + "# Command Line Usage" + ] + }, + { + "cell_type": "markdown", + "id": "14790bf9-9504-4823-9370-db738fe29355", + "metadata": {}, "source": [ "## `ncompare`'s command line arguments, provided by the `--help` description" ] @@ -41,16 +49,20 @@ "cell_type": "code", "execution_count": 1, "id": "07e397b3-4964-4a90-b7f5-ae35185f86e5", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:10.236545Z", + "start_time": "2024-12-13T19:23:09.323920Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "usage: ncompare [-h] [-v COMPARISON_VAR_NAME] [-g COMPARISON_VAR_GROUP]\n", - " [--only-diffs] [--file-text FILE_TEXT] [--file-csv FILE_CSV]\n", - " [--file-xlsx FILE_XLSX] [--no-color] [--show-attributes]\n", - " [--show-chunks]\n", + "usage: ncompare [-h] [--only-diffs] [--file-text FILE_TEXT]\n", + " [--file-csv FILE_CSV] [--file-xlsx FILE_XLSX] [--no-color]\n", + " [--show-attributes] [--show-chunks]\n", " [--column-widths COLUMN_WIDTHS COLUMN_WIDTHS COLUMN_WIDTHS]\n", " [--version]\n", " nc_a nc_b\n", @@ -59,14 +71,10 @@ "\n", "positional arguments:\n", " nc_a First NetCDF file\n", - " nc_b First NetCDF file\n", + " nc_b Second NetCDF file\n", "\n", "options:\n", " -h, --help show this help message and exit\n", - " -v COMPARISON_VAR_NAME, --comparison_var_name COMPARISON_VAR_NAME\n", - " Comparison variable name\n", - " -g COMPARISON_VAR_GROUP, --comparison_var_group COMPARISON_VAR_GROUP\n", - " Comparison variable group\n", " --only-diffs Only display variables and attributes that are\n", " different\n", " --file-text FILE_TEXT\n", @@ -115,7 +123,12 @@ "cell_type": "code", "execution_count": 2, "id": "136bbeb8-6d74-4373-8ef7-1c20c1fe6afc", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:10.245010Z", + "start_time": "2024-12-13T19:23:10.242690Z" + } + }, "outputs": [], "source": [ "from pathlib import Path\n", @@ -141,7 +154,12 @@ "cell_type": "code", "execution_count": 3, "id": "10a025b9-4483-4925-873e-6653b64441e3", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:11.688286Z", + "start_time": "2024-12-13T19:23:10.323624Z" + } + }, "outputs": [], "source": [ "import requests\n", @@ -175,7 +193,12 @@ "cell_type": "code", "execution_count": 4, "id": "43cace42-aa55-469e-84d9-13a45115267e", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:12.154657Z", + "start_time": "2024-12-13T19:23:11.695335Z" + } + }, "outputs": [ { "name": "stdout", @@ -190,8 +213,6 @@ "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "Root-level Groups:\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\t\u001b[36mAre all items the same? ---> True. (No items exist.)\u001b[0m\n", - "\u001b[0m\u001b[37m\u001b[0m\u001b[90m\n", - "No variable group selected for comparison. Skipping..\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "All variables:\u001b[0m\n", "\u001b[0m File A File B\u001b[0m\n", @@ -203,33 +224,47 @@ "\u001b[0m - -------------------------- --------------------------\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lat_bounds lat_bounds\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('latitude', 'nv') ('latitude', 'nv')\u001b[0m\n", "\u001b[0m shape: (72, 2) (72, 2)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: latitude latitude\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('latitude',) ('latitude',)\u001b[0m\n", "\u001b[0m shape: (72,) (72,)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lon_bounds lon_bounds\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('longitude', 'nv') ('longitude', 'nv')\u001b[0m\n", "\u001b[0m shape: (144, 2) (144, 2)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: longitude longitude\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('longitude',) ('longitude',)\u001b[0m\n", "\u001b[0m shape: (144,) (144,)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: precip precip\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('time', 'latitude', 'longitude') ('time', 'latitude', 'longitude')\u001b[0m\n", "\u001b[0m shape: (1, 72, 144) (1, 72, 144)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: precip_error precip_error\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('time', 'latitude', 'longitude') ('time', 'latitude', 'longitude')\u001b[0m\n", "\u001b[0m shape: (1, 72, 144) (1, 72, 144)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: time time\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('time',) ('time',)\u001b[0m\n", "\u001b[0m shape: (1,) (1,)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: time_bounds time_bounds\u001b[0m\n", "\u001b[0m dtype: float32 float32\u001b[0m\n", + "\u001b[0m dimensions: ('time', 'nv') ('time', 'nv')\u001b[0m\n", "\u001b[0m shape: (1, 2) (1, 2)\u001b[0m\n", "\u001b[0m - -------------------------- --------------------------\u001b[0m\n", - "\u001b[0m Total number of shared items: 8 8\u001b[0m\n", - "\u001b[0m Total number of non-shared items: 0 0\u001b[0m\n", + "\u001b[0m SUMMARY -------------------------- --------------------------\u001b[0m\n", + "\u001b[0m Total # of shared variables: 8 8\u001b[0m\n", + "\u001b[0m Total # of non-shared variables: 0 0\u001b[0m\n", + "\u001b[0m Total # of shared groups: 0 0\u001b[0m\n", + "\u001b[0m Total # of non-shared groups: 0 0\u001b[0m\n", + "\u001b[0m Total # of shared attributes: 24 24\u001b[0m\n", + "\u001b[0m Total # of non-shared attributes: 0 0\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\n", "Done.\u001b[0m\n", + "\u001b[0m0\u001b[0m\n", "\u001b[0m\u001b[0m" ] } @@ -251,7 +286,12 @@ "cell_type": "code", "execution_count": 5, "id": "c48728a0-1379-4a05-b7e6-ad50694510df", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:12.577663Z", + "start_time": "2024-12-13T19:23:12.161049Z" + } + }, "outputs": [ { "name": "stdout", @@ -261,8 +301,6 @@ "\u001b[0m\u001b[37m\u001b[0mFile B: PERSIANN-CDR_v01r01_20230419_c20231030.nc\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "Root-level Dimensions:\u001b[0m\n", - "/usr/local/Caskroom/miniconda/base/envs/ncompare-jupyter-example/lib/python3.12/site-packages/xarray/conventions.py:428: SerializationWarning: variable 'precipitation' has multiple fill values {-9999.0, -1.0}, decoding all values to NaN.\n", - " new_vars[k] = decode_cf_variable(\n", "\u001b[0m\u001b[37m\u001b[0m\tAre all items the same? ---> \u001b[31mFalse. (2 items are shared, out of 6 total.)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\t\u001b[31mWhich items are different?\u001b[0m\n", "\u001b[0m File A File B\u001b[0m\n", @@ -276,8 +314,6 @@ "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "Root-level Groups:\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\t\u001b[36mAre all items the same? ---> True. (No items exist.)\u001b[0m\n", - "\u001b[0m\u001b[37m\u001b[0m\u001b[90m\n", - "No variable group selected for comparison. Skipping..\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "All variables:\u001b[0m\n", "\u001b[0m File A File B\u001b[0m\n", @@ -289,49 +325,72 @@ "\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lat\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat',)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480,)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lat_bnds\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat', 'nv')\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480, 2)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lat_bounds \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude', 'nv') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72, 2) \u001b[0m\n", "\u001b[0m -----VARIABLE-----: latitude \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude',) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72,) \u001b[0m\n", "\u001b[0m -----VARIABLE-----: lon\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon',)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440,)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lon_bnds\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon', 'nv')\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440, 2)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lon_bounds \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude', 'nv') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144, 2) \u001b[0m\n", "\u001b[0m -----VARIABLE-----: longitude \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude',) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144,) \u001b[0m\n", "\u001b[0m -----VARIABLE-----: precip \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n", "\u001b[0m -----VARIABLE-----: precip_error \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n", "\u001b[0m -----VARIABLE-----: precipitation\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'lon', 'lat')\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 1440, 480)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: time time\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 int32\u001b[0m\n", + "\u001b[0m dimensions: ('time',) ('time',)\u001b[0m\n", "\u001b[0m shape: (1,) (1,)\u001b[0m\n", "\u001b[0m -----VARIABLE-----: time_bounds \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'nv') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 2) \u001b[0m\n", "\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n", - "\u001b[0m Total number of shared items: 1 1\u001b[0m\n", - "\u001b[0m Total number of non-shared items: 7 5\u001b[0m\n", + "\u001b[0m SUMMARY ------------------------------ ------------------------------\u001b[0m\n", + "\u001b[0m Total # of shared variables: 1 1\u001b[0m\n", + "\u001b[0m Total # of non-shared variables: 7 5\u001b[0m\n", + "\u001b[0m Total # of shared groups: 0 0\u001b[0m\n", + "\u001b[0m Total # of non-shared groups: 0 0\u001b[0m\n", + "\u001b[0m Total # of shared attributes: 2 2\u001b[0m\n", + "\u001b[0m Total # of non-shared attributes: 22 16\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", + "Differences were found in these attributes:\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", + "['dimensions', 'dtype', 'shape']\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\n", "Done.\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m" + "\u001b[0m50\u001b[0m\n", + "\u001b[0m\u001b[0m" ] } ], @@ -351,7 +410,12 @@ "cell_type": "code", "execution_count": 6, "id": "1dd4c51a-394c-4569-b8b1-053743e63cb9", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:12.990375Z", + "start_time": "2024-12-13T19:23:12.583132Z" + } + }, "outputs": [ { "name": "stdout", @@ -361,8 +425,6 @@ "\u001b[0m\u001b[37m\u001b[0mFile B: PERSIANN-CDR_v01r01_20230419_c20231030.nc\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "Root-level Dimensions:\u001b[0m\n", - "/usr/local/Caskroom/miniconda/base/envs/ncompare-jupyter-example/lib/python3.12/site-packages/xarray/conventions.py:428: SerializationWarning: variable 'precipitation' has multiple fill values {-9999.0, -1.0}, decoding all values to NaN.\n", - " new_vars[k] = decode_cf_variable(\n", "\u001b[0m\u001b[37m\u001b[0m\tAre all items the same? ---> \u001b[31mFalse. (2 items are shared, out of 6 total.)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\t\u001b[31mWhich items are different?\u001b[0m\n", "\u001b[0m File A File B\u001b[0m\n", @@ -376,8 +438,6 @@ "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "Root-level Groups:\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\t\u001b[36mAre all items the same? ---> True. (No items exist.)\u001b[0m\n", - "\u001b[0m\u001b[37m\u001b[0m\u001b[90m\n", - "No variable group selected for comparison. Skipping..\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", "All variables:\u001b[0m\n", "\u001b[0m File A File B\u001b[0m\n", @@ -389,6 +449,7 @@ "\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lat\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat',)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480,)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mbounds: lat_bnds\u001b[0m\n", @@ -399,15 +460,18 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_min: -60.0\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lat_bnds\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat', 'nv')\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480, 2)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lat_bounds \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude', 'nv') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72, 2) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mcomment: latitude values at the north and south bounds of each pixel. \u001b[0m\n", "\u001b[0m -----VARIABLE-----: latitude \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude',) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72,) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31maxis: Y \u001b[0m\n", @@ -418,6 +482,7 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [-90.0, 90.0, ...] \u001b[0m\n", "\u001b[0m -----VARIABLE-----: lon\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon',)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440,)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mbounds: lon_bnds\u001b[0m\n", @@ -428,15 +493,18 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_min: 0.0\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lon_bnds\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon', 'nv')\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440, 2)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n", "\u001b[0m -----VARIABLE-----: lon_bounds \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude', 'nv') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144, 2) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mcomment: longitude values at the west and east bounds of each pixel. \u001b[0m\n", "\u001b[0m -----VARIABLE-----: longitude \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude',) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144,) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31maxis: X \u001b[0m\n", @@ -447,6 +515,7 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [0.0, 360.0, ...] \u001b[0m\n", "\u001b[0m -----VARIABLE-----: precip \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mcell_methods: area: mean time: mean \u001b[0m\n", @@ -458,6 +527,7 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [0.0, 100.0, ...] \u001b[0m\n", "\u001b[0m -----VARIABLE-----: precip_error \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mcoordinates: time latitude longitude \u001b[0m\n", @@ -467,6 +537,7 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [0.0, 100.0, ...] \u001b[0m\n", "\u001b[0m -----VARIABLE-----: precipitation\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'lon', 'lat')\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 1440, 480)\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: [1, 1440, 480]\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31m_FillValue: -1.0\u001b[0m\n", @@ -479,6 +550,7 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_min: 0.0\u001b[0m\n", "\u001b[0m -----VARIABLE-----: time time\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 int32\u001b[0m\n", + "\u001b[0m dimensions: ('time',) ('time',)\u001b[0m\n", "\u001b[0m shape: (1,) (1,)\u001b[0m\n", "\u001b[0m chunksize: contiguous contiguous\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31maxis: T \u001b[0m\n", @@ -489,15 +561,26 @@ "\u001b[0m\u001b[37m\u001b[0m \u001b[31munits: days since 1970-01-01 00:00:00 0:00 days since 1979-01-01 0:0:0\u001b[0m\n", "\u001b[0m -----VARIABLE-----: time_bounds \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'nv') \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 2) \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m \u001b[31mcomment: time bounds for each time value \u001b[0m\n", "\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n", - "\u001b[0m Total number of shared items: 1 1\u001b[0m\n", - "\u001b[0m Total number of non-shared items: 7 5\u001b[0m\n", + "\u001b[0m SUMMARY ------------------------------ ------------------------------\u001b[0m\n", + "\u001b[0m Total # of shared variables: 1 1\u001b[0m\n", + "\u001b[0m Total # of non-shared variables: 7 5\u001b[0m\n", + "\u001b[0m Total # of shared groups: 0 0\u001b[0m\n", + "\u001b[0m Total # of non-shared groups: 0 0\u001b[0m\n", + "\u001b[0m Total # of shared attributes: 5 5\u001b[0m\n", + "\u001b[0m Total # of non-shared attributes: 60 42\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", + "Differences were found in these attributes:\u001b[0m\n", + "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n", + "['_FillValue', 'axis', 'bounds', 'calendar', 'cell_method', 'cell_methods', 'chunksize', 'comment', 'coordinates', 'dimensions', 'dtype', 'long_name', 'missing_value', 'shape', 'standard_name', 'units', 'valid_max', 'valid_min', 'valid_range']\u001b[0m\n", "\u001b[0m\u001b[37m\u001b[0m\n", "Done.\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m" + "\u001b[0m114\u001b[0m\n", + "\u001b[0m\u001b[0m" ] } ], @@ -505,6 +588,235 @@ "! ncompare --show-attributes --show-chunks --column-widths 33 30 30 {file_names[0]} {file_names[2]}" ] }, + { + "cell_type": "markdown", + "id": "7e1344892c25806a", + "metadata": {}, + "source": [ + "# Python Package Usage Example\n", + "----" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "525f98b5cbb923", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:13.326162Z", + "start_time": "2024-12-13T19:23:12.996001Z" + } + }, + "outputs": [], + "source": [ + "from ncompare import compare" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f3363c6630447104", + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-13T19:23:13.428938Z", + "start_time": "2024-12-13T19:23:13.330402Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File A: gpcp_v02r03_monthly_d202301_c20230411.nc\n", + "File B: PERSIANN-CDR_v01r01_20230419_c20231030.nc\n", + "\n", + "Root-level Dimensions:\n", + "\tAre all items the same? ---> False. (2 items are shared, out of 6 total.)\n", + "\tWhich items are different?\n", + " File A File B\n", + " #00 ------------------------------ ------------------('lat', 480)\n", + " #01 --------------('latitude', 72) ------------------------------\n", + " #02 ------------------------------ -----------------('lon', 1440)\n", + " #03 ------------('longitude', 144) ------------------------------\n", + "\n", + "Root-level Groups:\n", + "\tAre all items the same? ---> True. (No items exist.)\n", + "\n", + "All variables:\n", + " File A File B\n", + " All Variables \n", + " - ------------------------------ ------------------------------\n", + " \n", + " GROUP #00 -----------------------------/ -----------------------------/\n", + " num variables in group: 8 6\n", + " - ------------------------------ ------------------------------\n", + " -----VARIABLE-----: lat\n", + " dtype: float32\n", + " dimensions: ('lat',)\n", + " shape: (480,)\n", + " chunksize: contiguous\n", + " bounds: lat_bnds\n", + " long_name: latitude\n", + " standard_name: latitude\n", + " units: degrees_north\n", + " valid_max: 60.0\n", + " valid_min: -60.0\n", + " -----VARIABLE-----: lat_bnds\n", + " dtype: float32\n", + " dimensions: ('lat', 'nv')\n", + " shape: (480, 2)\n", + " chunksize: contiguous\n", + " -----VARIABLE-----: lat_bounds \n", + " dtype: float32 \n", + " dimensions: ('latitude', 'nv') \n", + " shape: (72, 2) \n", + " chunksize: contiguous \n", + " comment: latitude values at the north and south bounds of each pixel. \n", + " -----VARIABLE-----: latitude \n", + " dtype: float32 \n", + " dimensions: ('latitude',) \n", + " shape: (72,) \n", + " chunksize: contiguous \n", + " axis: Y \n", + " bounds: lat_bounds \n", + " long_name: Latitude \n", + " standard_name: latitude \n", + " units: degrees_north \n", + " valid_range: [-90.0, 90.0, ...] \n", + " -----VARIABLE-----: lon\n", + " dtype: float32\n", + " dimensions: ('lon',)\n", + " shape: (1440,)\n", + " chunksize: contiguous\n", + " bounds: lon_bnds\n", + " long_name: longitude\n", + " standard_name: longitude\n", + " units: degrees_east\n", + " valid_max: 360.0\n", + " valid_min: 0.0\n", + " -----VARIABLE-----: lon_bnds\n", + " dtype: float32\n", + " dimensions: ('lon', 'nv')\n", + " shape: (1440, 2)\n", + " chunksize: contiguous\n", + " -----VARIABLE-----: lon_bounds \n", + " dtype: float32 \n", + " dimensions: ('longitude', 'nv') \n", + " shape: (144, 2) \n", + " chunksize: contiguous \n", + " comment: longitude values at the west and east bounds of each pixel. \n", + " -----VARIABLE-----: longitude \n", + " dtype: float32 \n", + " dimensions: ('longitude',) \n", + " shape: (144,) \n", + " chunksize: contiguous \n", + " axis: X \n", + " bounds: lon_bounds \n", + " long_name: Longitude \n", + " standard_name: longitude \n", + " units: degrees_east \n", + " valid_range: [0.0, 360.0, ...] \n", + " -----VARIABLE-----: precip \n", + " dtype: float32 \n", + " dimensions: ('time', 'latitude', 'longitude') \n", + " shape: (1, 72, 144) \n", + " chunksize: contiguous \n", + " cell_methods: area: mean time: mean \n", + " coordinates: time latitude longitude \n", + " long_name: NOAA Climate Data Record (CDR) of GPCP Monthly Satellite-Gauge Combined Precipitation \n", + " missing_value: -9999.0 \n", + " standard_name: precipitation amount \n", + " units: mm/day \n", + " valid_range: [0.0, 100.0, ...] \n", + " -----VARIABLE-----: precip_error \n", + " dtype: float32 \n", + " dimensions: ('time', 'latitude', 'longitude') \n", + " shape: (1, 72, 144) \n", + " chunksize: contiguous \n", + " coordinates: time latitude longitude \n", + " long_name: NOAA CDR of GPCP Satellite-Gauge Combined Precipitation Error \n", + " missing_value: -9999.0 \n", + " units: mm/day \n", + " valid_range: [0.0, 100.0, ...] \n", + " -----VARIABLE-----: precipitation\n", + " dtype: float32\n", + " dimensions: ('time', 'lon', 'lat')\n", + " shape: (1, 1440, 480)\n", + " chunksize: [1, 1440, 480]\n", + " _FillValue: -1.0\n", + " cell_method: sum\n", + " long_name: NOAA Climate Data Record of PERSIANN-CDR daily precipitation\n", + " missing_value: -9999.0\n", + " standard_name: precipitation_amount\n", + " units: mm\n", + " valid_max: 999999.0\n", + " valid_min: 0.0\n", + " -----VARIABLE-----: time time\n", + " dtype: float32 int32\n", + " axis: T \n", + " bounds: time_bounds \n", + " calendar: Gregorian \n", + " units: days since 1970-01-01 00:00:00 0:00 days since 1979-01-01 0:0:0\n", + " -----VARIABLE-----: time_bounds \n", + " dtype: float32 \n", + " dimensions: ('time', 'nv') \n", + " shape: (1, 2) \n", + " chunksize: contiguous \n", + " comment: time bounds for each time value \n", + " - ------------------------------ ------------------------------\n", + " SUMMARY ------------------------------ ------------------------------\n", + " Total # of shared variables: 1 1\n", + " Total # of non-shared variables: 7 5\n", + " Total # of shared groups: 0 0\n", + " Total # of non-shared groups: 0 0\n", + " Total # of shared attributes: 5 5\n", + " Total # of non-shared attributes: 60 42\n", + "\n", + "Differences were found in these attributes:\n", + "\n", + "['_FillValue', 'axis', 'bounds', 'calendar', 'cell_method', 'cell_methods', 'chunksize', 'comment', 'coordinates', 'dimensions', 'dtype', 'long_name', 'missing_value', 'shape', 'standard_name', 'units', 'valid_max', 'valid_min', 'valid_range']\n", + "\n", + "Done.\n" + ] + } + ], + "source": [ + "total_number_of_differences = compare(\n", + " file_names[0],\n", + " file_names[2],\n", + " only_diffs=True,\n", + " show_attributes=True,\n", + " show_chunks=True,\n", + " column_widths=[33, 30, 30],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b8b3f36e-c4ed-4392-81a8-fedcbd6fa3c8", + "metadata": {}, + "source": [ + "The output of `ncompare` is the total number of differences (across _variables_, _groups_, and _attributes_):" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d93e1ef4-4bf9-4e48-b166-d42ca2ff42e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "114\n" + ] + } + ], + "source": [ + "print(total_number_of_differences)" + ] + }, { "cell_type": "markdown", "id": "dccb326d-3b47-4d0f-b96d-93577d3e7c54", @@ -516,9 +828,9 @@ ], "metadata": { "kernelspec": { - "display_name": "ncompare-jupyter-example", + "display_name": "temp_for_ncompare_test", "language": "python", - "name": "ncompare-jupyter-example" + "name": "temp_for_ncompare_test" }, "language_info": { "codemirror_mode": { @@ -530,7 +842,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.0" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/ncompare/__init__.py b/ncompare/__init__.py index 0fc1ce4..e2def66 100644 --- a/ncompare/__init__.py +++ b/ncompare/__init__.py @@ -24,3 +24,15 @@ # See the License for the specific language governing permissions and limitations under the License. """Main code for comparing NetCDF files.""" + +from importlib.metadata import version + +from .core import ( + compare, +) + +__all__ = [ + "compare", +] + +__version__ = version("ncompare") diff --git a/ncompare/console.py b/ncompare/console.py index e530043..8aad37a 100755 --- a/ncompare/console.py +++ b/ncompare/console.py @@ -43,16 +43,14 @@ def _cli(args: Optional[Sequence[str]]) -> argparse.Namespace: Parameters ---------- - args : None or list[str] - if None, then argparse will use sys.argv[1:] + args + if None, then argparse will use `sys.argv[1:]` """ parser = argparse.ArgumentParser( description="Compare the variables contained within two different NetCDF datasets" ) parser.add_argument("nc_a", help="First NetCDF file") parser.add_argument("nc_b", help="Second NetCDF file") - parser.add_argument("-v", "--comparison_var_name", help="Comparison variable name") - parser.add_argument("-g", "--comparison_var_group", help="Comparison variable group") parser.add_argument( "--only-diffs", action="store_true", @@ -110,10 +108,11 @@ def main() -> None: # pragma: no cover delattr(args, "version") try: - compare(**vars(args)) + total_diff_count = compare(**vars(args)) except Exception: # pylint: disable=broad-exception-caught print(traceback.format_exc()) sys.exit(1) + print(total_diff_count) sys.exit(0) # a clean, no-issue, exit diff --git a/ncompare/core.py b/ncompare/core.py index 66be6a5..9d6ca0b 100644 --- a/ncompare/core.py +++ b/ncompare/core.py @@ -29,23 +29,23 @@ """Compare the structure of two NetCDF files.""" -import random -import traceback +import warnings from collections import namedtuple from collections.abc import Iterable, Iterator from pathlib import Path -from typing import Optional, Union +from typing import Optional, TypedDict, Union import netCDF4 -import numpy as np import xarray as xr -from colorama import Fore, Style +from colorama import Fore -from ncompare.printing import Outputter +from ncompare.printing import Outputter, SummaryDifferenceKeys from ncompare.sequence_operations import common_elements, count_diffs from ncompare.utils import ensure_valid_path_exists, ensure_valid_path_with_suffix -VarProperties = namedtuple("VarProperties", "varname, variable, dtype, shape, chunking, attributes") +VarProperties = namedtuple( + "VarProperties", "varname, variable, dtype, dimensions, shape, chunking, attributes" +) GroupPair = namedtuple( "GroupPair", @@ -54,11 +54,17 @@ ) +class SummaryDifferencesDict(TypedDict): + shared: int + left: int + right: int + both: int + difference_types: set + + def compare( nc_a: Union[str, Path], nc_b: Union[str, Path], - comparison_var_group: Optional[str] = None, - comparison_var_name: Optional[str] = None, only_diffs: bool = False, no_color: bool = False, show_chunks: bool = False, @@ -67,39 +73,36 @@ def compare( file_csv: Union[str, Path] = "", file_xlsx: Union[str, Path] = "", column_widths: Optional[tuple[Union[int, str], Union[int, str], Union[int, str]]] = None, -) -> None: - """Compare the variables contained within two different NetCDF datasets. +) -> int: + """Compare the variables contained within two different netCDF datasets. Parameters ---------- - nc_a : str - filepath to NetCDF4 - nc_b : str - filepath to NetCDF4 - comparison_var_group : str, optional - The name of a group which contains a desired comparison variable - comparison_var_name : str, optional - The name of a variable for which we want to compare values - only_diffs : bool, optional + nc_a + filepath to the first netCDF + nc_b + filepath to the second netCDF + only_diffs Whether to show only the variables/attributes that are different between the two files - no_color : bool, default False + no_color Turns off the use of ANSI escape character sequences for producing colored terminal text - show_chunks : bool, default False + show_chunks Whether to include data chunk sizes in the displayed comparison of variables - show_attributes : bool, default False + show_attributes Whether to include variable attributes in the displayed comparison of variables - file_text : str + file_text filepath destination to save captured text output as a TXT file. - file_csv : str + file_csv filepath destination to save comparison output as comma-separated values (CSV). - file_xlsx : str + file_xlsx filepath destination to save comparison output as an Excel workbook. - column_widths : tuple[int | str, int | str, int | str], optional + column_widths the width in number of characters for each column of the comparison table. Returns ------- - None + int + total number of differences found (across variables, groups, and attributes) """ # Check the validity of paths. nc_a = ensure_valid_path_exists(nc_a) @@ -123,12 +126,10 @@ def compare( out.print(f"File B: {nc_b}") # Start the comparison process. - run_through_comparisons( + total_diff_count = run_through_comparisons( out, nc_a, nc_b, - comparison_var_group=comparison_var_group, - comparison_var_name=comparison_var_name, show_chunks=show_chunks, show_attributes=show_attributes, ) @@ -141,27 +142,35 @@ def compare( out.print("\nDone.", colors=False) + return total_diff_count + def run_through_comparisons( out: Outputter, nc_a: Union[str, Path], nc_b: Union[str, Path], - comparison_var_group: Optional[str], - comparison_var_name: Optional[str], show_chunks: bool, show_attributes: bool, -) -> None: - """Execute a series of comparisons between two NetCDF files. +) -> int: + """Execute a series of comparisons between two netCDF files. Parameters ---------- out + instance of Outputter nc_a + path to the first netCDF file nc_b - comparison_var_group - comparison_var_name + path to the second netCDF file show_chunks + whether to include data chunk sizes in the displayed comparison of variables show_attributes + whether to include variable attributes in the displayed comparison of variables + + Returns + ------- + int + total number of differences found (across variables, groups, and attributes) """ # Show the dimensions of each file and evaluate differences. out.print(Fore.LIGHTBLUE_EX + "\nRoot-level Dimensions:", add_to_history=True) @@ -175,90 +184,12 @@ def run_through_comparisons( list_b = _get_groups(nc_b) _, _, _ = out.lists_diff(list_a, list_b) - if comparison_var_group: - # Show the variables within the selected group. - out.print( - Fore.LIGHTBLUE_EX + f"\nVariables within specified group <{comparison_var_group}>:", - add_to_history=True, - ) - vlist_a = _get_vars(nc_a, comparison_var_group) - vlist_b = _get_vars(nc_b, comparison_var_group) - _, _, _ = out.lists_diff(vlist_a, vlist_b) - - # TODO: Remove comparison variable/val? - if comparison_var_name: - try: - # Print the first part of the values array for the selected variable. - out.print( - Fore.LIGHTBLUE_EX - + f"\nSample values within specified variable <{comparison_var_name}>:" - ) - _print_sample_values(out, nc_a, comparison_var_group, comparison_var_name) - _print_sample_values(out, nc_b, comparison_var_group, comparison_var_name) - # compare_sample_values(nc_a, nc_b, groupname=comparison_var_group, varname=comparison_var_name) - - out.print( - Fore.LIGHTBLUE_EX - + f"\nChecking multiple random values within specified variable <{comparison_var_name}>:" - ) - compare_multiple_random_values( - out, - nc_a, - nc_b, - groupname=comparison_var_group, - varname=comparison_var_name, - ) - - except KeyError: - out.print( - Style.BRIGHT - + Fore.RED - + f"\nError when comparing values for variable <{comparison_var_name}> " - f"in group <{comparison_var_group}>." - ) - out.print(traceback.format_exc()) - out.print("\n") - else: - out.print(Fore.LIGHTBLACK_EX + "\nNo variable selected for comparison. Skipping..") - else: - out.print(Fore.LIGHTBLACK_EX + "\nNo variable group selected for comparison. Skipping..") - out.print(Fore.LIGHTBLUE_EX + "\nAll variables:", add_to_history=True) - _, _, _ = compare_two_nc_files( + total_diff_count = compare_two_nc_files( out, nc_a, nc_b, show_chunks=show_chunks, show_attributes=show_attributes ) - -def compare_multiple_random_values( - out: Outputter, - nc_a: Union[str, Path], - nc_b: Union[str, Path], - groupname: str, - varname: str, - num_comparisons: int = 100, -): - """Iterate through N random samples, and evaluate whether the differences exceed a threshold.""" - # Open a variable from each NetCDF - nc_var_a = xr.open_dataset(nc_a, backend_kwargs={"group": groupname}).variables[varname] - nc_var_b = xr.open_dataset(nc_b, backend_kwargs={"group": groupname}).variables[varname] - - num_mismatches = 0 - for _ in range(num_comparisons): - match_result = _match_random_value(out, nc_var_a, nc_var_b) - if match_result is True: - out.print(".", colors=False, end="") - elif match_result is None: - out.print("n", colors=False, end="") - num_mismatches += 1 - else: - out.print("x", colors=False, end="") - num_mismatches += 1 - - if num_mismatches > 0: - out.print(Fore.RED + f" {num_mismatches} mismatches, out of {num_comparisons} samples.") - else: - out.print(Fore.CYAN + " No mismatches.") - out.print("Done.", colors=False) + return total_diff_count def walk_common_groups_tree( @@ -271,10 +202,14 @@ def walk_common_groups_tree( Parameters ---------- - top_a_name : str - top_a : netCDF4.Dataset or netCDF4.Group - top_b_name : str - top_b : netCDF4.Dataset or netCDF4.Group + top_a_name + name of the first group or dataset + top_a + the first group or dataset + top_b_name + name of the second group or dataset + top_b + the second group or dataset Yields ------ @@ -325,11 +260,50 @@ def compare_two_nc_files( nc_two: Union[str, Path], show_chunks: bool = False, show_attributes: bool = False, -) -> tuple[int, int, int]: - """Go through all groups and all variables, and show them side by side - whether they align and where they don't.""" - out.side_by_side(" ", "File A", "File B", force_display_even_if_same=True) +) -> int: + """Go through all groups and all variables, and show them side by side, + highlighting whether they align and where they don't. - num_var_diffs = {"left": 0, "right": 0, "both": 0} + Parameters + ---------- + out + instance of Outputter + nc_one + path to the first dataset + nc_two + path to the second dataset + show_chunks + whether to include chunks alongside variables + show_attributes + whether to include variable attributes + + Returns + ------- + int + total number of differences found (across variables, groups, and attributes) + """ + out.side_by_side(" ", "File A", "File B", force_display_even_if_same=True) + num_group_diffs: SummaryDifferencesDict = { + "shared": 0, + "left": 0, + "right": 0, + "both": 0, + "difference_types": set(), + } + num_var_diffs: SummaryDifferencesDict = { + "shared": 0, + "left": 0, + "right": 0, + "both": 0, + "difference_types": set(), + } + num_attribute_diffs: SummaryDifferencesDict = { + "shared": 0, + "left": 0, + "right": 0, + "both": 0, + "difference_types": set(), + } with netCDF4.Dataset(nc_one) as nc_a, netCDF4.Dataset(nc_two) as nc_b: out.side_by_side( "All Variables", " ", " ", dash_line=False, force_display_even_if_same=True @@ -345,12 +319,20 @@ def compare_two_nc_files( "/", group_counter, num_var_diffs, + num_attribute_diffs, show_attributes, show_chunks, ) group_counter += 1 for group_pair in walk_common_groups_tree("", nc_a, "", nc_b): + if group_pair.group_a_name == "": + num_group_diffs["right"] += 1 + elif group_pair.group_b_name == "": + num_group_diffs["left"] += 1 + else: + num_group_diffs["shared"] += 1 + _print_group_details_side_by_side( out, group_pair.group_a, @@ -359,45 +341,74 @@ def compare_two_nc_files( group_pair.group_b_name, group_counter, num_var_diffs, + num_attribute_diffs, show_attributes, show_chunks, ) group_counter += 1 out.side_by_side("-", "-", "-", dash_line=True, force_display_even_if_same=True) + out.side_by_side("SUMMARY", "-", "-", dash_line=True, force_display_even_if_same=True) + + _print_summary_count_comparison_side_by_side(out, "variable", num_var_diffs) + _print_summary_count_comparison_side_by_side(out, "group", num_group_diffs) + _print_summary_count_comparison_side_by_side(out, "attribute", num_attribute_diffs) + if num_attribute_diffs["difference_types"]: + out.print( + Fore.LIGHTBLUE_EX + "\nDifferences were found in these attributes:", add_to_history=True + ) + out.print( + Fore.LIGHTBLUE_EX + f"\n{sorted(num_attribute_diffs['difference_types'])}", + add_to_history=True, + ) + + # Return the total number of differences; thus, zero means no differences were found. + total_diff_count = sum( + [x["left"] + x["right"] for x in [num_var_diffs, num_group_diffs, num_attribute_diffs]] + ) + + return total_diff_count + + +def _print_summary_count_comparison_side_by_side( + out: Outputter, + item_type: str, + diff_dictionary: SummaryDifferencesDict, +) -> None: + # Tally up instances where there were non-empty entries on both left and right sides. + diff_dictionary["left"] += diff_dictionary["both"] + diff_dictionary["right"] += diff_dictionary["both"] + out.side_by_side( - "Total number of shared items:", - str(num_var_diffs["both"]), - str(num_var_diffs["both"]), + f"Total # of shared {item_type}s:", + str(diff_dictionary["shared"]), + str(diff_dictionary["shared"]), force_display_even_if_same=True, ) + out.side_by_side( - "Total number of non-shared items:", - str(num_var_diffs["left"]), - str(num_var_diffs["right"]), + f"Total # of non-shared {item_type}s:", + str(diff_dictionary["left"]), + str(diff_dictionary["right"]), force_display_even_if_same=True, ) - return num_var_diffs["left"], num_var_diffs["right"], num_var_diffs["both"] def _print_group_details_side_by_side( - out, + out: Outputter, group_a: Union[netCDF4.Dataset, netCDF4.Group], group_a_name: str, group_b: Union[netCDF4.Dataset, netCDF4.Group], group_b_name: str, group_counter: int, - num_var_diffs: dict, + num_var_diffs: SummaryDifferencesDict, + num_attribute_diffs: SummaryDifferencesDict, show_attributes: bool, show_chunks: bool, ) -> None: + """Align and display group details side by side.""" out.side_by_side( - " ", - " ", - " ", - dash_line=False, - highlight_diff=False, - force_display_even_if_same=True, + " ", " ", " ", dash_line=False, highlight_diff=False, force_display_even_if_same=True ) out.side_by_side( f"GROUP #{group_counter:02}", @@ -425,10 +436,10 @@ def _print_group_details_side_by_side( out.side_by_side("-", "-", "-", dash_line=True, force_display_even_if_same=True) # Count differences between the lists of variables in this group. - left, right, both = count_diffs(vars_a_sorted, vars_b_sorted) + left, right, shared = count_diffs(vars_a_sorted, vars_b_sorted) num_var_diffs["left"] += left num_var_diffs["right"] += right - num_var_diffs["both"] += both + num_var_diffs["shared"] += shared # Go through each variable in the current group. for variable_pair in common_elements(vars_a_sorted, vars_b_sorted): @@ -437,47 +448,47 @@ def _print_group_details_side_by_side( out, _var_properties(group_a, variable_pair[1]), _var_properties(group_b, variable_pair[2]), + num_attribute_diffs, show_chunks=show_chunks, show_attributes=show_attributes, ) def _print_var_properties_side_by_side( - out, + out: Outputter, v_a: VarProperties, v_b: VarProperties, + num_attribute_diffs: SummaryDifferencesDict, show_chunks: bool = False, show_attributes: bool = False, -): - # Gather all variable property pairs first, before printing, so we can decide whether to highlight the variable header +) -> None: + """Align and display variable properties side by side.""" + # Gather all variable property pairs first, before printing, + # so we can decide whether to highlight the variable header. pairs_to_check_and_show = [ (v_a.dtype, v_b.dtype), + (v_a.dimensions, v_b.dimensions), (v_a.shape, v_b.shape), ] if show_chunks: pairs_to_check_and_show.append((v_a.chunking, v_b.chunking)) if show_attributes: - for attr_a_key, attr_a, attr_b_key, attr_b in get_and_check_variable_attributes(v_a, v_b): - # Check whether attr_a_key is empty, because it might be if the variable doesn't exist in File A. + for attr_a_key, attr_a, attr_b_key, attr_b in _get_and_check_variable_attributes(v_a, v_b): + # Check whether attr_a_key is empty, + # because it might be if the variable doesn't exist in File A. pairs_to_check_and_show.append((attr_a, attr_b)) # Scale Factor - scale_factor_pair = get_and_check_variable_scale_factor(v_a, v_b) + scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b) if scale_factor_pair: pairs_to_check_and_show.append((scale_factor_pair[0], scale_factor_pair[1])) - # print(f"pairs_to_check_and_show === {pairs_to_check_and_show}") there_is_a_difference = False for pair in pairs_to_check_and_show: if pair[0] != pair[1]: there_is_a_difference = True break - # Variable name - # header_color = None - # if there_is_a_difference: - # header_color = Fore.RED - - # If all attributes are the same, and keep-only-diffs is set -> DONT print + # If all attributes are the same, and keep-only-diffs is set -> DON'T print # If all attributes are the same, and keep-only-diffs is NOT set -> print # If some attributes are different -> print no matter else if there_is_a_difference or (not out.keep_only_diffs): @@ -489,31 +500,38 @@ def _print_var_properties_side_by_side( force_display_even_if_same=True, ) - # Data type - out.side_by_side("dtype:", v_a.dtype, v_b.dtype, highlight_diff=True) - # Shape - out.side_by_side("shape:", v_a.shape, v_b.shape, highlight_diff=True) + # Go through each attribute, show differences, and add differences to running tally. + def _var_attribute_side_by_side(attribute_name, attribute_a, attribute_b): + diff_condition: SummaryDifferenceKeys = out.side_by_side( + f"{attribute_name}:", attribute_a, attribute_b, highlight_diff=True + ) + num_attribute_diffs[diff_condition] += 1 + if diff_condition in ("left", "right", "both"): + num_attribute_diffs["difference_types"].add(attribute_name) + + _var_attribute_side_by_side("dtype", v_a.dtype, v_b.dtype) + _var_attribute_side_by_side("dimensions", v_a.dimensions, v_b.dimensions) + _var_attribute_side_by_side("shape", v_a.shape, v_b.shape) # Chunking if show_chunks: - out.side_by_side("chunksize:", v_a.chunking, v_b.chunking, highlight_diff=True) - # Attributes - if show_attributes: - for attr_a_key, attr_a, attr_b_key, attr_b in get_and_check_variable_attributes(v_a, v_b): - # Check whether attr_a_key is empty, because it might be if the variable doesn't exist in File A. - out.side_by_side( - f"{attr_a_key if attr_a_key else attr_b_key}:", - attr_a, - attr_b, - highlight_diff=True, - ) - + _var_attribute_side_by_side("chunksize", v_a.chunking, v_b.chunking) # Scale Factor - scale_factor_pair = get_and_check_variable_scale_factor(v_a, v_b) + scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b) if scale_factor_pair: - out.side_by_side("sf:", scale_factor_pair[0], scale_factor_pair[1], highlight_diff=True) + _var_attribute_side_by_side("scale_factor", scale_factor_pair[0], scale_factor_pair[1]) + # Other attributes + if show_attributes: + for attr_a_key, attr_a, attr_b_key, attr_b in _get_and_check_variable_attributes(v_a, v_b): + # Check whether attr_a_key is empty, + # because it might be if the variable doesn't exist in File A. + attribute_key = attr_a_key if attr_a_key else attr_b_key + _var_attribute_side_by_side(attribute_key, attr_a, attr_b) -def get_and_check_variable_scale_factor(v_a, v_b) -> Union[None, tuple[str, str]]: +def _get_and_check_variable_scale_factor( + v_a: VarProperties, v_b: VarProperties +) -> Union[None, tuple[str, str]]: + """Get a string representation of the scale factor for two variables.""" if getattr(v_a.variable, "scale_factor", None): sf_a = v_a.variable.scale_factor else: @@ -528,7 +546,10 @@ def get_and_check_variable_scale_factor(v_a, v_b) -> Union[None, tuple[str, str] return None -def get_and_check_variable_attributes(v_a, v_b): +def _get_and_check_variable_attributes( + v_a: VarProperties, v_b: VarProperties +) -> Iterator[tuple[str, str, str, str]]: + """Go through and yield each attribute pair for two variables.""" # Get the name of attributes if they exist attrs_a_names = [] if v_a.attributes: @@ -548,91 +569,45 @@ def _var_properties(group: Union[netCDF4.Dataset, netCDF4.Group], varname: str) Parameters ---------- - group : `netCDF4.Dataset` or netCDF4.Group object - varname : str + group + a dataset or group of variables + varname + the name of the variable Returns ------- - netCDF4.Variable - str - dtype of variable values - tuple - shape of variable - tuple - chunking - dict - any other attributes for this variable + VarProperties """ if varname: the_variable = group.variables[varname] v_dtype = str(the_variable.dtype) + v_dimensions = str(the_variable.dimensions) v_shape = str(the_variable.shape).strip() v_chunking = str(the_variable.chunking()).strip() - v_attributes = {name: getattr(the_variable, name) for name in the_variable.ncattrs()} + + v_attributes = {} + for name in the_variable.ncattrs(): + try: + v_attributes[name] = the_variable.getncattr(name) + except KeyError as key_err: + # Added this check because of "unsupported datatype" error that prevented + # fully running comparisons on S5P_OFFL_L1B_IR_UVN collections. + v_attributes[name] = f"netCDF error: {str(key_err)}" else: the_variable = None v_dtype = "" + v_dimensions = "" v_shape = "" v_chunking = "" v_attributes = None - return VarProperties(varname, the_variable, v_dtype, v_shape, v_chunking, v_attributes) - - -def _match_random_value( - out: Outputter, nc_var_a: xr.Variable, nc_var_b: xr.Variable, thresh: float = 1e-6 -) -> Union[bool, None]: - """Check whether a randomly selected data point matches between two variables. - - Returns - ------- - None or bool - None if data point is null for one and only one of the variables - True if values match - False if the difference exceeds the given threshold - """ - # Get a random indexer - rand_index = [] - for dim_length in nc_var_a.shape: - rand_index.append(random.randint(0, dim_length - 1)) - rand_index_tuple = tuple(rand_index) - - # Get the values from each variable - value_a = nc_var_a.values[rand_index_tuple] - value_b = nc_var_b.values[rand_index_tuple] - - # Check whether null - if np.isnan(value_a) and np.isnan(value_b): - return True - elif np.isnan(value_a) or np.isnan(value_b): - return None - - # Evaluate difference between values - diff = value_b - value_a - if abs(diff) > thresh: - out.print() - out.print(Fore.RED + f"Difference exceeded threshold (diff == {diff}") - out.print(f"var shape: {nc_var_a.shape}", colors=False) - out.print(f"indices: {rand_index_tuple}", colors=False) - out.print(f"value a: {value_a}", colors=False) - out.print(f"value b: {value_b}", colors=False, end="\n\n") - return False - - return True - - -def _print_sample_values(out: Outputter, nc_filepath, groupname: str, varname: str) -> None: - comparison_variable = xr.open_dataset(nc_filepath, backend_kwargs={"group": groupname})[varname] - vector_of_values = comparison_variable.values.flatten() - n_values = len(vector_of_values) - if n_values > 100: - sample_length = 100 - else: - sample_length = n_values - out.print(str(vector_of_values[:sample_length]), colors=False) + return VarProperties( + varname, the_variable, v_dtype, v_dimensions, v_shape, v_chunking, v_attributes + ) def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) -> str: + """Get a string representation of the attribute value.""" if attribute_key and (attribute_key in varprops.attributes): attr = varprops.attributes[attribute_key] if isinstance(attr, Iterable) and not isinstance(attr, (str, float)): @@ -647,27 +622,21 @@ def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) -> return "" -def _get_vars(nc_filepath: Union[str, Path], groupname: str) -> list: - try: - grp = xr.open_dataset(nc_filepath, backend_kwargs={"group": groupname}) - except OSError as err: - print(f"\nError occurred when attempting to open group within <{nc_filepath}>.\n") - raise err - grp_varlist = sorted(list(grp.variables.keys())) # type:ignore[type-var] - - return grp_varlist - - def _get_groups(nc_filepath: Union[str, Path]) -> list: + """Get a list of groups from a netCDF.""" with netCDF4.Dataset(nc_filepath) as dataset: groups_list = list(dataset.groups.keys()) return groups_list def _get_dims(nc_filepath: Union[str, Path]) -> list: + """Get a list of dimensions from a netCDF.""" + def __get_dim_list(decode_times=True): - with xr.open_dataset(nc_filepath, decode_times=decode_times) as dataset: - return list(dataset.sizes.items()) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with xr.open_dataset(nc_filepath, decode_times=decode_times) as dataset: + return list(dataset.sizes.items()) try: dims_list = __get_dim_list() diff --git a/ncompare/printing.py b/ncompare/printing.py index 339c513..5224b30 100644 --- a/ncompare/printing.py +++ b/ncompare/printing.py @@ -29,9 +29,9 @@ import csv import re import warnings -from collections.abc import Iterable +from collections.abc import Iterable, Iterator from pathlib import Path -from typing import Optional, TextIO, Union +from typing import Literal, Optional, TextIO, Union import colorama import openpyxl @@ -41,6 +41,8 @@ from ncompare.sequence_operations import common_elements, count_diffs +SummaryDifferenceKeys = Literal["shared", "left", "right", "both"] + # Set up regex remover of ANSI color escape sequences # From ansi_escape = re.compile( @@ -77,6 +79,15 @@ def __init__( Parameters ---------- keep_print_history + whether to keep printing history or not - used for file output + keep_only_diffs + whether to keep and print only comparisons that show differences + no_color + whether to turn off colorized output + text_file + optional path to a text file to write output to + column_widths + optional tuple of column widths to use for printing """ # Parse the print history option. self._keep_print_history = keep_print_history @@ -140,10 +151,10 @@ def print( Parameters ---------- - string : str - colors : bool + string + colors If False, ANSI colors will be turned off. - add_to_history : bool + add_to_history print_args Additional keyword arguments that are passed to the standard Python print() function. """ @@ -211,7 +222,7 @@ def side_by_side( highlight_diff=False, force_display_even_if_same=False, force_color=None, - ) -> None: + ) -> SummaryDifferenceKeys: """Print three strings on one line, with customized formatting and an optional marker in the fourth column. Parameters @@ -219,8 +230,18 @@ def side_by_side( str_a str_b str_c - dash_line : bool, default False - highlight_diff : bool, default False + dash_line + highlight_diff + force_display_even_if_same + force_color + + Returns + ------- + str + "shared" if str_b == str_c, + "left" if only str_c is empty, + "right" if str_b is empty, and + "both" if they are different from each other. """ are_different = str_b != str_c if ( @@ -228,7 +249,7 @@ def side_by_side( and (are_different is False) and self.keep_only_diffs ): - return None + return "shared" # there are two non-empty strings, and they are equal to each other. # If the 'b' and 'c' strings are different (or force_color is set), # then change the font of 'a' to the color red. @@ -265,6 +286,15 @@ def side_by_side( self._add_to_history(str_a, str_b, str_c, str_marker) + if not are_different: + return "shared" + elif str_b and (not str_c): + return "left" # there is only a non-empty string on the left side. + elif str_c and (not str_b): + return "right" # there is only a non-empty string on the right side. + else: + return "both" # there are non-empty strings on both sides, and they are not equal. + def side_by_side_list_diff(self, list_a: list, list_b: list, counter_prefix="") -> None: """Print the items from two lists vertically (i.e., side by side), with customized formatting. @@ -289,7 +319,24 @@ def lists_diff( list_b: list, ignore_order: bool = True, ) -> tuple[int, int, int]: - """Compare two lists and state whether there are differences.""" + """Compare two lists and state whether there are differences. + + Parameters + ---------- + list_a + list_b + ignore_order + + Returns + ------- + tuple + int + number of entries only present in the first (left) list + int + number of entries only present in the second (right) list + int + number of entries shared among the first (left) and second (right) list + """ set_a, set_b = set(list_a), set(list_b) s_union = set_a.union(set_b) @@ -311,11 +358,11 @@ def lists_diff( self.print(msg + " (No items exist.)", add_to_history=True) return 0, 0, len(list_a) - # If contents are not the same, continue... - left, right, both = count_diffs(list_a, list_b) + # If contents are different, continue... + left, right, shared = count_diffs(list_a, list_b) self.print( "\t" + "Are all items the same? ---> " + Fore.RED + f"{str(contents_are_same)}." - f" ({_item_is_or_are(both)} shared, out of {len(s_union)} total.)", + f" ({_item_is_or_are(shared)} shared, out of {len(s_union)} total.)", add_to_history=True, ) @@ -328,9 +375,9 @@ def lists_diff( self.side_by_side_list_diff(list_a, list_b) self.side_by_side("Number of non-shared items:", str(left), str(right)) - return left, right, both + return left, right, shared - def write_history_to_csv(self, filename: Union[str, Path] = "test.csv"): + def write_history_to_csv(self, filename: Union[str, Path] = "test.csv") -> None: """Save the line history that's been stored to a CSV file.""" headers = ["Info", "File A", "File B", "Other marks"] with open(filename, "w", encoding="utf-8") as target: @@ -338,8 +385,8 @@ def write_history_to_csv(self, filename: Union[str, Path] = "test.csv"): writer.writerow(headers) writer.writerows(self._line_history) - def write_history_to_excel(self, filename: Union[str, Path] = "test.xlsx"): - """Save the line history that's been stored to a CSV file.""" + def write_history_to_excel(self, filename: Union[str, Path] = "test.xlsx") -> None: + """Save the line history that's been stored to an Excel file.""" workbook = openpyxl.Workbook() sheet = workbook.active @@ -363,14 +410,14 @@ def write_history_to_excel(self, filename: Union[str, Path] = "test.xlsx"): workbook.save(filename) -def _item_is_or_are(count): +def _item_is_or_are(count) -> str: if count == 1: return f"{count} item is" return f"{count} items are" -def _excel_red_cells(data, sheet): +def _excel_red_cells(data, sheet) -> Iterator: """Stylize cells in Excel with a red font.""" for cell in data: cell = Cell(sheet, column="A", row=1, value=cell) @@ -378,7 +425,7 @@ def _excel_red_cells(data, sheet): yield cell -def _excel_bold_underline_cells(data, sheet): +def _excel_bold_underline_cells(data, sheet) -> Iterator: """Stylize cells in Excel with a bold and underlined font.""" for cell in data: cell = Cell(sheet, column="A", row=1, value=cell) diff --git a/ncompare/sequence_operations.py b/ncompare/sequence_operations.py index 5baef68..5d2e246 100644 --- a/ncompare/sequence_operations.py +++ b/ncompare/sequence_operations.py @@ -73,7 +73,7 @@ def common_elements( def count_diffs( list_a: Union[list[str], list[int], str], list_b: Union[list[str], list[int], str] ) -> tuple[int, int, int]: - """Count how many elements are either uniquely in one list or the other, or in both. + """Count how many elements are either uniquely in one list or the other, or shared. Note ---- @@ -96,6 +96,6 @@ def count_diffs( # The number of differences is computed. left = len(set_a - set_b) right = len(set_b - set_a) - both = len(set_a.intersection(set_b)) + shared = len(set_a.intersection(set_b)) - return left, right, both + return left, right, shared diff --git a/poetry.lock b/poetry.lock index 55630c1..cfb84e1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1167,13 +1167,13 @@ pygments = ">2.12.0" [[package]] name = "mkdocs-material" -version = "9.5.44" +version = "9.5.47" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.44-py3-none-any.whl", hash = "sha256:47015f9c167d58a5ff5e682da37441fc4d66a1c79334bfc08d774763cacf69ca"}, - {file = "mkdocs_material-9.5.44.tar.gz", hash = "sha256:f3a6c968e524166b3f3ed1fb97d3ed3e0091183b0545cedf7156a2a6804c56c0"}, + {file = "mkdocs_material-9.5.47-py3-none-any.whl", hash = "sha256:53fb9c9624e7865da6ec807d116cd7be24b3cb36ab31b1d1d1a9af58c56009a2"}, + {file = "mkdocs_material-9.5.47.tar.gz", hash = "sha256:fc3b7a8e00ad896660bd3a5cc12ca0cb28bdc2bcbe2a946b5714c23ac91b0ede"}, ] [package.dependencies] @@ -1812,13 +1812,13 @@ extra = ["pygments (>=2.12)"] [[package]] name = "pytest" -version = "8.3.3" +version = "8.3.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, - {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, + {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, + {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, ] [package.dependencies] @@ -2339,29 +2339,29 @@ files = [ [[package]] name = "ruff" -version = "0.7.3" +version = "0.8.1" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.7.3-py3-none-linux_armv6l.whl", hash = "sha256:34f2339dc22687ec7e7002792d1f50712bf84a13d5152e75712ac08be565d344"}, - {file = "ruff-0.7.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:fb397332a1879b9764a3455a0bb1087bda876c2db8aca3a3cbb67b3dbce8cda0"}, - {file = "ruff-0.7.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:37d0b619546103274e7f62643d14e1adcbccb242efda4e4bdb9544d7764782e9"}, - {file = "ruff-0.7.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d59f0c3ee4d1a6787614e7135b72e21024875266101142a09a61439cb6e38a5"}, - {file = "ruff-0.7.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44eb93c2499a169d49fafd07bc62ac89b1bc800b197e50ff4633aed212569299"}, - {file = "ruff-0.7.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d0242ce53f3a576c35ee32d907475a8d569944c0407f91d207c8af5be5dae4e"}, - {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6b6224af8b5e09772c2ecb8dc9f3f344c1aa48201c7f07e7315367f6dd90ac29"}, - {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c50f95a82b94421c964fae4c27c0242890a20fe67d203d127e84fbb8013855f5"}, - {file = "ruff-0.7.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7f3eff9961b5d2644bcf1616c606e93baa2d6b349e8aa8b035f654df252c8c67"}, - {file = "ruff-0.7.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8963cab06d130c4df2fd52c84e9f10d297826d2e8169ae0c798b6221be1d1d2"}, - {file = "ruff-0.7.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:61b46049d6edc0e4317fb14b33bd693245281a3007288b68a3f5b74a22a0746d"}, - {file = "ruff-0.7.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:10ebce7696afe4644e8c1a23b3cf8c0f2193a310c18387c06e583ae9ef284de2"}, - {file = "ruff-0.7.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3f36d56326b3aef8eeee150b700e519880d1aab92f471eefdef656fd57492aa2"}, - {file = "ruff-0.7.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5d024301109a0007b78d57ab0ba190087b43dce852e552734ebf0b0b85e4fb16"}, - {file = "ruff-0.7.3-py3-none-win32.whl", hash = "sha256:4ba81a5f0c5478aa61674c5a2194de8b02652f17addf8dfc40c8937e6e7d79fc"}, - {file = "ruff-0.7.3-py3-none-win_amd64.whl", hash = "sha256:588a9ff2fecf01025ed065fe28809cd5a53b43505f48b69a1ac7707b1b7e4088"}, - {file = "ruff-0.7.3-py3-none-win_arm64.whl", hash = "sha256:1713e2c5545863cdbfe2cbce21f69ffaf37b813bfd1fb3b90dc9a6f1963f5a8c"}, - {file = "ruff-0.7.3.tar.gz", hash = "sha256:e1d1ba2e40b6e71a61b063354d04be669ab0d39c352461f3d789cac68b54a313"}, + {file = "ruff-0.8.1-py3-none-linux_armv6l.whl", hash = "sha256:fae0805bd514066f20309f6742f6ee7904a773eb9e6c17c45d6b1600ca65c9b5"}, + {file = "ruff-0.8.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b8a4f7385c2285c30f34b200ca5511fcc865f17578383db154e098150ce0a087"}, + {file = "ruff-0.8.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cd054486da0c53e41e0086e1730eb77d1f698154f910e0cd9e0d64274979a209"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2029b8c22da147c50ae577e621a5bfbc5d1fed75d86af53643d7a7aee1d23871"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2666520828dee7dfc7e47ee4ea0d928f40de72056d929a7c5292d95071d881d1"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:333c57013ef8c97a53892aa56042831c372e0bb1785ab7026187b7abd0135ad5"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:288326162804f34088ac007139488dcb43de590a5ccfec3166396530b58fb89d"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b12c39b9448632284561cbf4191aa1b005882acbc81900ffa9f9f471c8ff7e26"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:364e6674450cbac8e998f7b30639040c99d81dfb5bbc6dfad69bc7a8f916b3d1"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b22346f845fec132aa39cd29acb94451d030c10874408dbf776af3aaeb53284c"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b2f2f7a7e7648a2bfe6ead4e0a16745db956da0e3a231ad443d2a66a105c04fa"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:adf314fc458374c25c5c4a4a9270c3e8a6a807b1bec018cfa2813d6546215540"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a885d68342a231b5ba4d30b8c6e1b1ee3a65cf37e3d29b3c74069cdf1ee1e3c9"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d2c16e3508c8cc73e96aa5127d0df8913d2290098f776416a4b157657bee44c5"}, + {file = "ruff-0.8.1-py3-none-win32.whl", hash = "sha256:93335cd7c0eaedb44882d75a7acb7df4b77cd7cd0d2255c93b28791716e81790"}, + {file = "ruff-0.8.1-py3-none-win_amd64.whl", hash = "sha256:2954cdbe8dfd8ab359d4a30cd971b589d335a44d444b6ca2cb3d1da21b75e4b6"}, + {file = "ruff-0.8.1-py3-none-win_arm64.whl", hash = "sha256:55873cc1a473e5ac129d15eccb3c008c096b94809d693fc7053f588b67822737"}, + {file = "ruff-0.8.1.tar.gz", hash = "sha256:3583db9a6450364ed5ca3f3b4225958b24f78178908d5c4bc0f46251ccca898f"}, ] [[package]] @@ -2436,22 +2436,22 @@ files = [ [[package]] name = "tornado" -version = "6.4.1" +version = "6.4.2" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." optional = false python-versions = ">=3.8" files = [ - {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"}, - {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14"}, - {file = "tornado-6.4.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e20b9113cd7293f164dc46fffb13535266e713cdb87bd2d15ddb336e96cfc4"}, - {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ae50a504a740365267b2a8d1a90c9fbc86b780a39170feca9bcc1787ff80842"}, - {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613bf4ddf5c7a95509218b149b555621497a6cc0d46ac341b30bd9ec19eac7f3"}, - {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:25486eb223babe3eed4b8aecbac33b37e3dd6d776bc730ca14e1bf93888b979f"}, - {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:454db8a7ecfcf2ff6042dde58404164d969b6f5d58b926da15e6b23817950fc4"}, - {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a02a08cc7a9314b006f653ce40483b9b3c12cda222d6a46d4ac63bb6c9057698"}, - {file = "tornado-6.4.1-cp38-abi3-win32.whl", hash = "sha256:d9a566c40b89757c9aa8e6f032bcdb8ca8795d7c1a9762910c722b1635c9de4d"}, - {file = "tornado-6.4.1-cp38-abi3-win_amd64.whl", hash = "sha256:b24b8982ed444378d7f21d563f4180a2de31ced9d8d84443907a0a64da2072e7"}, - {file = "tornado-6.4.1.tar.gz", hash = "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9"}, + {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e828cce1123e9e44ae2a50a9de3055497ab1d0aeb440c5ac23064d9e44880da1"}, + {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803"}, + {file = "tornado-6.4.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a017d239bd1bb0919f72af256a970624241f070496635784d9bf0db640d3fec"}, + {file = "tornado-6.4.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c36e62ce8f63409301537222faffcef7dfc5284f27eec227389f2ad11b09d946"}, + {file = "tornado-6.4.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca9eb02196e789c9cb5c3c7c0f04fb447dc2adffd95265b2c7223a8a615ccbf"}, + {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:304463bd0772442ff4d0f5149c6f1c2135a1fae045adf070821c6cdc76980634"}, + {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:c82c46813ba483a385ab2a99caeaedf92585a1f90defb5693351fa7e4ea0bf73"}, + {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:932d195ca9015956fa502c6b56af9eb06106140d844a335590c1ec7f5277d10c"}, + {file = "tornado-6.4.2-cp38-abi3-win32.whl", hash = "sha256:2876cef82e6c5978fde1e0d5b1f919d756968d5b4282418f3146b79b58556482"}, + {file = "tornado-6.4.2-cp38-abi3-win_amd64.whl", hash = "sha256:908b71bf3ff37d81073356a5fadcc660eb10c1476ee6e2725588626ce7e5ca38"}, + {file = "tornado-6.4.2.tar.gz", hash = "sha256:92bad5b4746e9879fd7bf1eb21dce4e3fc5128d71601f80005afa39237ad620b"}, ] [[package]] @@ -2618,4 +2618,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "23343a1e303b445f5cc352380af173128fbd50b66b83105dc4b4eb1dba38aab8" +content-hash = "96eeae493d6ddb99129f57dd7f124315c3312da2d07bdd1dfa37f912b804145c" diff --git a/pyproject.toml b/pyproject.toml index ccc0c32..54b05ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ncompare" -version = "1.11.0" +version = "1.12.0rc5" description = "Compare the structure of two NetCDF files at the command line" authors = ["Daniel Kaufman "] readme = "README.md" @@ -32,7 +32,7 @@ openpyxl = ">=3.1.2" [tool.poetry.group.dev.dependencies] pytest = ">=7.4.2,<9.0.0" -ruff = ">=0.5.1,<0.7.4" +ruff = ">=0.5.1,<0.8.2" mypy = ">=1.5.1" pytest-cov = ">=4.1,<7.0" mkdocs = ">=1.5.3" diff --git a/tests/data/a-b_test_golden_file.csv b/tests/data/a-b_test_golden_file.csv index 749bec1..b3bb60a 100644 --- a/tests/data/a-b_test_golden_file.csv +++ b/tests/data/a-b_test_golden_file.csv @@ -1,103 +1,121 @@ -Info,File A,File B,Other marks -Root-level Dimensions: - Are all items the same? ---> True. -Root-level Groups: - Are all items the same? ---> True. -All variables: - ,File A,File B, -All Variables, , , --,-,-, - , , , -GROUP #00,/,/, -num variables in group:,2,2, --,-,-, ------VARIABLE-----:,conditions,conditions, -dtype:,int32,int32, -shape:,"(2,)","(2,)", -chunksize:,contiguous,contiguous, ------VARIABLE-----:,time,time, -dtype:,float64,float64, -shape:,"(5,)","(5,)", -chunksize:,[512],[512], -calendar:,gregorian,gregorian, -coordinates:,time,time, -long_name:,Time of observation,Time of observation, -units:,hours since 0001-01-01 00:00:00.0,hours since 0001-01-01 00:00:00.0, - , , , -GROUP #01,/Data,/Data, -num variables in group:,1,1, --,-,-, ------VARIABLE-----:,level,level, -dtype:,int32,int32, -shape:,"(2,)","(2,)", -chunksize:,[1024],[1024], -units:,hPa,hPa, - , , , -GROUP #02,/Position,/Position, -num variables in group:,2,2, --,-,-, ------VARIABLE-----:,lat,lat, -dtype:,float32,float32, -shape:,"(3,)","(2,)",*** -chunksize:,contiguous,contiguous, -units:,degrees north,degrees north, ------VARIABLE-----:,lon,lon, -dtype:,float32,float32, -shape:,"(4,)","(2,)",*** -chunksize:,contiguous,contiguous, -units:,degrees east,degrees east, - , , , -GROUP #03,/Statistics,/Statistics, -num variables in group:,1,1, --,-,-, ------VARIABLE-----:,mean_value,, -dtype:,float32,,*** -shape:,"(5,)",,*** -chunksize:,[1024],,*** -coordinates:,time,,*** -long_name:,average value for each time,,*** ------VARIABLE-----:,,std_value, -dtype:,,float32,*** -shape:,,"(5,)",*** -chunksize:,,[1024],*** -coordinates:,,time,*** -long_name:,,standard deviation value for each time,*** - , , , -GROUP #04,/Data/Products,/Data/Products, -num variables in group:,1,1, --,-,-, ------VARIABLE-----:,temp,temp, -dtype:,float32,float32, -shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",*** -chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",*** -long_name:,temperature,temperature, -units:,K,Kelvin,*** - , , , -GROUP #05,/Data/Quality,/Data/Quality, -num variables in group:,1,1, --,-,-, ------VARIABLE-----:,quality_flag,quality_flag, -dtype:,int32,int32, -shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",*** -chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",*** -units:,unitless,unitless, - , , , -GROUP #06,,/Data/Supplemental, -num variables in group:,0,1,*** --,-,-, ------VARIABLE-----:,,supplemental_flag, -dtype:,,int32,*** -shape:,,"(5, 2)",*** -chunksize:,,"[1, 2]",*** -units:,,unitless,*** - , , , -GROUP #07,,/Data/Supplemental/Details, -num variables in group:,0,1,*** --,-,-, ------VARIABLE-----:,,condition_details, -dtype:,,float64,*** -shape:,,"(2,)",*** -chunksize:,,contiguous,*** --,-,-, -Total number of shared items:,7,7, -Total number of non-shared items:,1,3, +Info,File A,File B,Other marks +Root-level Dimensions: + Are all items the same? ---> True. +Root-level Groups: + Are all items the same? ---> True. +All variables: + ,File A,File B, +All Variables, , , +-,-,-, + , , , +GROUP #00,/,/, +num variables in group:,2,2, +-,-,-, +-----VARIABLE-----:,conditions,conditions, +dtype:,int32,int32, +dimensions:,"('conditions',)","('conditions',)", +shape:,"(2,)","(2,)", +chunksize:,contiguous,contiguous, +-----VARIABLE-----:,time,time, +dtype:,float64,float64, +dimensions:,"('time',)","('time',)", +shape:,"(5,)","(5,)", +chunksize:,[512],[512], +calendar:,gregorian,gregorian, +coordinates:,time,time, +long_name:,Time of observation,Time of observation, +units:,hours since 0001-01-01 00:00:00.0,hours since 0001-01-01 00:00:00.0, + , , , +GROUP #01,/Data,/Data, +num variables in group:,1,1, +-,-,-, +-----VARIABLE-----:,level,level, +dtype:,int32,int32, +dimensions:,"('level',)","('level',)", +shape:,"(2,)","(2,)", +chunksize:,[1024],[1024], +units:,hPa,hPa, + , , , +GROUP #02,/Position,/Position, +num variables in group:,2,2, +-,-,-, +-----VARIABLE-----:,lat,lat, +dtype:,float32,float32, +dimensions:,"('lat',)","('lat',)", +shape:,"(3,)","(2,)",*** +chunksize:,contiguous,contiguous, +units:,degrees north,degrees north, +-----VARIABLE-----:,lon,lon, +dtype:,float32,float32, +dimensions:,"('lon',)","('lon',)", +shape:,"(4,)","(2,)",*** +chunksize:,contiguous,contiguous, +units:,degrees east,degrees east, + , , , +GROUP #03,/Statistics,/Statistics, +num variables in group:,1,1, +-,-,-, +-----VARIABLE-----:,mean_value,, +dtype:,float32,,*** +dimensions:,"('time',)",,*** +shape:,"(5,)",,*** +chunksize:,[1024],,*** +coordinates:,time,,*** +long_name:,average value for each time,,*** +-----VARIABLE-----:,,std_value, +dtype:,,float32,*** +dimensions:,,"('time',)",*** +shape:,,"(5,)",*** +chunksize:,,[1024],*** +coordinates:,,time,*** +long_name:,,standard deviation value for each time,*** + , , , +GROUP #04,/Data/Products,/Data/Products, +num variables in group:,1,1, +-,-,-, +-----VARIABLE-----:,temp,temp, +dtype:,float32,float32, +dimensions:,"('time', 'level', 'lat', 'lon')","('time', 'level', 'lat', 'lon')", +shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",*** +chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",*** +long_name:,temperature,temperature, +units:,K,Kelvin,*** + , , , +GROUP #05,/Data/Quality,/Data/Quality, +num variables in group:,1,1, +-,-,-, +-----VARIABLE-----:,quality_flag,quality_flag, +dtype:,int32,int32, +dimensions:,"('time', 'level', 'lat', 'lon')","('time', 'level', 'lat', 'lon')", +shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",*** +chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",*** +units:,unitless,unitless, + , , , +GROUP #06,,/Data/Supplemental, +num variables in group:,0,1,*** +-,-,-, +-----VARIABLE-----:,,supplemental_flag, +dtype:,,int32,*** +dimensions:,,"('time', 'conditions')",*** +shape:,,"(5, 2)",*** +chunksize:,,"[1, 2]",*** +units:,,unitless,*** + , , , +GROUP #07,,/Data/Supplemental/Details, +num variables in group:,0,1,*** +-,-,-, +-----VARIABLE-----:,,condition_details, +dtype:,,float64,*** +dimensions:,,"('conditions',)",*** +shape:,,"(2,)",*** +chunksize:,,contiguous,*** +-,-,-, +SUMMARY,-,-, +Total # of shared variables:,7,7, +Total # of non-shared variables:,1,3, +Total # of shared groups:,5,5, +Total # of non-shared groups:,0,2, +Total # of shared attributes:,31,31, +Total # of non-shared attributes:,13,22, +Differences were found in these attributes: +"['chunksize', 'coordinates', 'dimensions', 'dtype', 'long_name', 'shape', 'units']" diff --git a/tests/data/a-b_test_golden_file.txt b/tests/data/a-b_test_golden_file.txt index 54166ff..a661ee4 100644 --- a/tests/data/a-b_test_golden_file.txt +++ b/tests/data/a-b_test_golden_file.txt @@ -1,5 +1,5 @@ -File A: ncompare/tests/data/test_a.nc -File B: ncompare/tests/data/test_b.nc +File A: /Users/dkaufma3/projects/ncompare_project/2_ExperimentFolder/ncompare/tests/data/test_a.nc +File B: /Users/dkaufma3/projects/ncompare_project/2_ExperimentFolder/ncompare/tests/data/test_b.nc Root-level Dimensions: Are all items the same? ---> True. @@ -9,8 +9,6 @@ Root-level Groups: Are all items the same? ---> True. ['Data', 'Position', 'Statistics'] -No variable group selected for comparison. Skipping.. - All variables: File A File B All Variables @@ -21,10 +19,12 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: conditions conditions dtype: int32 int32 + dimensions: ('conditions',) ('conditions',) shape: (2,) (2,) chunksize: contiguous contiguous -----VARIABLE-----: time time dtype: float64 float64 + dimensions: ('time',) ('time',) shape: (5,) (5,) chunksize: [512] [512] calendar: gregorian gregorian @@ -37,6 +37,7 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: level level dtype: int32 int32 + dimensions: ('level',) ('level',) shape: (2,) (2,) chunksize: [1024] [1024] units: hPa hPa @@ -46,11 +47,13 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: lat lat dtype: float32 float32 + dimensions: ('lat',) ('lat',) shape: (3,) (2,) chunksize: contiguous contiguous units: degrees north degrees north -----VARIABLE-----: lon lon dtype: float32 float32 + dimensions: ('lon',) ('lon',) shape: (4,) (2,) chunksize: contiguous contiguous units: degrees east degrees east @@ -60,12 +63,14 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: mean_value dtype: float32 + dimensions: ('time',) shape: (5,) chunksize: [1024] coordinates: time long_name: average value for each time -----VARIABLE-----: std_value dtype: float32 + dimensions: ('time',) shape: (5,) chunksize: [1024] coordinates: time @@ -76,6 +81,7 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: temp temp dtype: float32 float32 + dimensions: ('time', 'level', 'lat', 'lon') ('time', 'level', 'lat', 'lon') shape: (5, 2, 3, 4) (5, 2, 2, 2) chunksize: [1, 1, 3, 4] [1, 1, 2, 2] long_name: temperature temperature @@ -86,6 +92,7 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: quality_flag quality_flag dtype: int32 int32 + dimensions: ('time', 'level', 'lat', 'lon') ('time', 'level', 'lat', 'lon') shape: (5, 2, 3, 4) (5, 2, 2, 2) chunksize: [1, 1, 3, 4] [1, 1, 2, 2] units: unitless unitless @@ -95,6 +102,7 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: supplemental_flag dtype: int32 + dimensions: ('time', 'conditions') shape: (5, 2) chunksize: [1, 2] units: unitless @@ -104,10 +112,20 @@ All variables: - ------------------------------------------------ ------------------------------------------------ -----VARIABLE-----: condition_details dtype: float64 + dimensions: ('conditions',) shape: (2,) chunksize: contiguous - ------------------------------------------------ ------------------------------------------------ - Total number of shared items: 7 7 - Total number of non-shared items: 1 3 + SUMMARY ------------------------------------------------ ------------------------------------------------ + Total # of shared variables: 7 7 + Total # of non-shared variables: 1 3 + Total # of shared groups: 5 5 + Total # of non-shared groups: 0 2 + Total # of shared attributes: 31 31 + Total # of non-shared attributes: 13 22 + +Differences were found in these attributes: + +['chunksize', 'coordinates', 'dimensions', 'dtype', 'long_name', 'shape', 'units'] Done. diff --git a/tests/data/a-b_test_golden_file.xlsx b/tests/data/a-b_test_golden_file.xlsx index 5b5b49c..a5426d3 100644 Binary files a/tests/data/a-b_test_golden_file.xlsx and b/tests/data/a-b_test_golden_file.xlsx differ diff --git a/tests/test_core.py b/tests/test_core.py index 8d127a6..b8595d2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -31,10 +31,12 @@ from contextlib import nullcontext as does_not_raise -import pytest -import xarray as xr +import netCDF4 as nc -from ncompare.core import _get_vars, _match_random_value, _print_sample_values, compare +from ncompare.core import ( + _var_properties, + compare, +) def compare_ab(a, b): @@ -69,139 +71,15 @@ def test_no_error_compare_2groupsTo1Subgroup( compare_ba(ds_3dims_3vars_4coords_2groups, ds_3dims_3vars_4coords_1subgroup) -def test_matching_random_values( - ds_3dims_2vars_4coords, - ds_4dims_3vars_5coords, - ds_3dims_3vars_4coords_1group, - ds_1dim_1var_1coord, - ds_1dim_1var_allnan_1coord, - outputter_to_console, -): - variable_array_1 = xr.open_dataset(ds_3dims_2vars_4coords).variables["z1"] - variable_array_2 = xr.open_dataset(ds_4dims_3vars_5coords).variables["z1"] - variable_array_3 = xr.open_dataset(ds_1dim_1var_1coord).variables["z1"] - variable_array_allnan = xr.open_dataset(ds_1dim_1var_allnan_1coord).variables["z1"] - - assert ( - _match_random_value( - outputter_to_console, - variable_array_1, - variable_array_1, - ) - is True - ) - assert ( - _match_random_value( - outputter_to_console, - variable_array_1, - variable_array_2, - ) - is False - ) - assert ( - _match_random_value( - outputter_to_console, - variable_array_3, - variable_array_3, - ) - is True - ) - # NaN to non-NaN is NOT considered a match - assert ( - _match_random_value( - outputter_to_console, - variable_array_3, - variable_array_allnan, - ) - is None - ) - # NaN to NaN is considered a match - assert ( - _match_random_value( - outputter_to_console, - variable_array_allnan, - variable_array_allnan, - ) - is True - ) - - -def test_print_values_runs_with_no_error(ds_3dims_3vars_4coords_1group, outputter_to_console): - with does_not_raise(): - _print_sample_values( - outputter_to_console, - ds_3dims_3vars_4coords_1group, - groupname="Group1", - varname="step", - ) - - -def test_print_values_to_text_file_runs_with_no_error( - ds_3dims_3vars_4coords_1group, outputter_to_text_file, temp_test_text_file_path -): - _print_sample_values( - outputter_to_text_file, - ds_3dims_3vars_4coords_1group, - groupname="Group1", - varname="step", - ) - outputter_to_text_file._text_file_obj.close() - - comparison_variable = xr.open_dataset( - ds_3dims_3vars_4coords_1group, backend_kwargs={"group": "Group1"} - )["step"] - - with open(temp_test_text_file_path) as f: - lines = f.readlines() - assert lines[0].strip().replace("[", "").replace("]", "").split() == [ - str(round(x, 1)) for x in comparison_variable[:].values - ] - - -def test_comparison_group_no_error_for_duplicate_dataset( - ds_3dims_3vars_4coords_1group, temp_test_text_file_path -): - compare( - ds_3dims_3vars_4coords_1group, - ds_3dims_3vars_4coords_1group, - comparison_var_group="Group1", - file_text=temp_test_text_file_path, - ) - - found_expected = False - with open(temp_test_text_file_path) as f: - for line in f.readlines(): - if "Variables within specified group :" in line: - found_expected = True - - assert found_expected - - -def test_comparison_var_no_error_for_duplicate_dataset( - ds_3dims_3vars_4coords_1group, temp_test_text_file_path -): - compare( - ds_3dims_3vars_4coords_1group, - ds_3dims_3vars_4coords_1group, - comparison_var_group="Group1", - comparison_var_name="var1", - file_text=temp_test_text_file_path, - ) - - found_expected = False - with open(temp_test_text_file_path) as f: - for line in f.readlines(): - if "Sample values within specified variable :" in line: - found_expected = True - - assert found_expected - - -def test_get_vars_with_group(ds_3dims_3vars_4coords_1group): - result = _get_vars(ds_3dims_3vars_4coords_1group, groupname="Group1") - assert set(result) == {"step", "var1", "var2", "w"} +def test_zero_for_comparison_with_no_differences(ds_3dims_3vars_4coords_1subgroup): + assert compare(ds_3dims_3vars_4coords_1subgroup, ds_3dims_3vars_4coords_1subgroup) == 0 -def test_get_vars_error_when_no_group(ds_3dims_2vars_4coords): - with pytest.raises(OSError): - _get_vars(ds_3dims_2vars_4coords, groupname="nonexistent_group") +def test_var_properties(ds_3dims_3vars_4coords_1group): + with nc.Dataset(ds_3dims_3vars_4coords_1group) as ds: + result = _var_properties(ds.groups["Group1"], varname="step") + assert result.varname == "step" + assert result.dtype == "float32" + assert result.shape == "(3,)" + assert result.chunking == "contiguous" + assert result.attributes == {} diff --git a/tests/test_printing.py b/tests/test_printing.py index e3223a3..d36bff2 100644 --- a/tests/test_printing.py +++ b/tests/test_printing.py @@ -25,8 +25,8 @@ def test_list_of_strings_diff(outputter_to_console): - left, right, both = outputter_to_console.lists_diff( + left, right, shared = outputter_to_console.lists_diff( ["hey", "yo", "beebop"], ["what", "is", "this", "beebop"] ) - assert (left, right, both) == (2, 3, 1) + assert (left, right, shared) == (2, 3, 1) diff --git a/tests/test_sequence_operations.py b/tests/test_sequence_operations.py index d1dd4a1..216ef26 100644 --- a/tests/test_sequence_operations.py +++ b/tests/test_sequence_operations.py @@ -52,12 +52,12 @@ def test_common_elements(two_example_lists): def test_count_str_list_diffs(two_example_lists): - left, right, both = count_diffs(*two_example_lists) + left, right, shared = count_diffs(*two_example_lists) - assert (left, right, both) == (2, 4, 1) + assert (left, right, shared) == (2, 4, 1) def test_count_int_list_diffs(): - left, right, both = count_diffs([1, 9, 5, 44, 89, 13], [3, 0, 5, 1]) + left, right, shared = count_diffs([1, 9, 5, 44, 89, 13], [3, 0, 5, 1]) - assert (left, right, both) == (4, 2, 2) + assert (left, right, shared) == (4, 2, 2)