diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ffe6baa..637e195 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -41,7 +41,7 @@ repos:
exclude_types: ["jupyter", "text"]
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.7.3
+ rev: v0.8.1
hooks:
- id: ruff
args: ["--fix", "--exit-non-zero-on-fix"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f8abf3..46b9949 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Common Changelog](https://common-changelog.org/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.12.0] - 2024-12-20
+
+### Changed
+
+- Clean up docstrings, especially removing types that are already annotated in function signature ([#274](https://github.com/nasa/ncompare/issues/274)) ([**@danielfromearth**](https://github.com/danielfromearth))
+
+### Added
+
+- Categorize counts of differences (including attributes) in a summary ([#276](https://github.com/nasa/ncompare/pull/276)) ([**@danielfromearth**](https://github.com/danielfromearth))
+- Include dimensions in variable attribute comparisons ([#277](https://github.com/nasa/ncompare/pull/277)) ([**@danielfromearth**](https://github.com/danielfromearth))
+- Provide numerical output where zero means no differences found ([#278](https://github.com/nasa/ncompare/pull/278)) ([**@danielfromearth**](https://github.com/danielfromearth))
+
+### Removed
+
+- **Breaking:** drop support for randomized value checks, which are no longer part of API ([#271](https://github.com/nasa/ncompare/pull/271)) ([**@danielfromearth**](https://github.com/danielfromearth))
+
+### Fixed
+
+- Catch "unsupported datatype" exception from netCDF library ([#268](https://github.com/nasa/ncompare/pull/268)) ([**@danielfromearth**](https://github.com/danielfromearth))
+
## [1.11.0] - 2024-11-14
### Added
diff --git a/README.md b/README.md
index 57a0834..3233110 100644
--- a/README.md
+++ b/README.md
@@ -32,34 +32,27 @@ _____
-Compare the structure of two NetCDF files at the command line.
+Compare the structure of two NetCDF files at the command line or via Python.
`ncompare` generates a view of the matching and non-matching groups and variables between two NetCDF datasets.
## Installing
-The latest release of `ncompare` can be installed with `mamba`, `conda` or `pip`.
-
-#### Using `mamba`
+The latest release of `ncompare` can be installed with `mamba`, `conda` or `pip`:
```bash
mamba install -c conda-forge ncompare
```
-
-#### Using `conda`
-
```bash
conda install -c conda-forge ncompare
```
-
-#### Using `pip`
-
```bash
pip install ncompare
```
-## Usage
+## Usage Examples
+### At a command line:
To compare two netCDF files,
pass the filepaths for each of the two netCDF files directly to ncompare, as follows:
@@ -77,23 +70,22 @@ a common use of _ncompare_ may look like this example:
ncompare S001G01.nc S001G01_SUBSET.nc --file-text subset_comparison.txt
```
-**A more complete usage demonstration with example output is shown in
-[this example notebook](https://ncompare.readthedocs.io/en/latest/example/ncompare-example-usage/).**
-
-### Options
-
-- `-h`, `--help` : Show this help message and exit.
-- `--file-text` [FILE_PATH]: Text file to write output to.
-- `--file-csv` [FILE_PATH]: Comma-separated values (CSV) file to write output to.
-- `--file-xlsx` [FILE_PATH]: Excel file to write output to.
-- `--only-diffs` : Only display variables and attributes that are different
-- `--no-color` : Turn off all colorized output.
-- `--show-attributes` : Include variable attributes in the table that compares variables.
-- `--show-chunks` : Include chunk sizes in the table that compares variables.
-- `-v` (`--comparison_var_name`) [VAR_NAME]: Compare specific values for this variable.
-- `-g` (`--comparison_var_group`) [VAR_GROUP]: Group that contains the `comparison_var_name`.
-- `--column-widths` [WIDTH, WIDTH, WIDTH]: Width, in number of characters, of the three columns in the comparison report
-- `--version` : Show the current version and then exit.
+### In a Python kernel:
+
+```python
+from ncompare import compare
+
+total_number_of_differences = compare(
+ "",
+ "",
+ only_diffs=True,
+ show_attributes=True,
+ show_chunks=True,
+)
+```
+
+
+### More complete usage demonstrations, with example output, are shown in [this example notebook](https://ncompare.readthedocs.io/en/latest/example/ncompare-example-usage/).
## Contributing
diff --git a/docs/example/ncompare-example-usage.ipynb b/docs/example/ncompare-example-usage.ipynb
index 1fa7d2d..689cf5b 100644
--- a/docs/example/ncompare-example-usage.ipynb
+++ b/docs/example/ncompare-example-usage.ipynb
@@ -5,7 +5,7 @@
"id": "214b2e0a-4a8a-48bb-b1f5-b457b69ece57",
"metadata": {},
"source": [
- "# Brief demonstration of `ncompare`: to compare the structure, groups, variables, and attributes of two netCDF files\""
+ "# Brief demonstration of ncompare: comparing the structure, groups, variables, and attributes of two netCDF files"
]
},
{
@@ -23,6 +23,14 @@
"cell_type": "markdown",
"id": "569c088b-0929-43c3-8d0f-6da3b6c89cce",
"metadata": {},
+ "source": [
+ "# Command Line Usage"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "14790bf9-9504-4823-9370-db738fe29355",
+ "metadata": {},
"source": [
"## `ncompare`'s command line arguments, provided by the `--help` description"
]
@@ -41,16 +49,20 @@
"cell_type": "code",
"execution_count": 1,
"id": "07e397b3-4964-4a90-b7f5-ae35185f86e5",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:10.236545Z",
+ "start_time": "2024-12-13T19:23:09.323920Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "usage: ncompare [-h] [-v COMPARISON_VAR_NAME] [-g COMPARISON_VAR_GROUP]\n",
- " [--only-diffs] [--file-text FILE_TEXT] [--file-csv FILE_CSV]\n",
- " [--file-xlsx FILE_XLSX] [--no-color] [--show-attributes]\n",
- " [--show-chunks]\n",
+ "usage: ncompare [-h] [--only-diffs] [--file-text FILE_TEXT]\n",
+ " [--file-csv FILE_CSV] [--file-xlsx FILE_XLSX] [--no-color]\n",
+ " [--show-attributes] [--show-chunks]\n",
" [--column-widths COLUMN_WIDTHS COLUMN_WIDTHS COLUMN_WIDTHS]\n",
" [--version]\n",
" nc_a nc_b\n",
@@ -59,14 +71,10 @@
"\n",
"positional arguments:\n",
" nc_a First NetCDF file\n",
- " nc_b First NetCDF file\n",
+ " nc_b Second NetCDF file\n",
"\n",
"options:\n",
" -h, --help show this help message and exit\n",
- " -v COMPARISON_VAR_NAME, --comparison_var_name COMPARISON_VAR_NAME\n",
- " Comparison variable name\n",
- " -g COMPARISON_VAR_GROUP, --comparison_var_group COMPARISON_VAR_GROUP\n",
- " Comparison variable group\n",
" --only-diffs Only display variables and attributes that are\n",
" different\n",
" --file-text FILE_TEXT\n",
@@ -115,7 +123,12 @@
"cell_type": "code",
"execution_count": 2,
"id": "136bbeb8-6d74-4373-8ef7-1c20c1fe6afc",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:10.245010Z",
+ "start_time": "2024-12-13T19:23:10.242690Z"
+ }
+ },
"outputs": [],
"source": [
"from pathlib import Path\n",
@@ -141,7 +154,12 @@
"cell_type": "code",
"execution_count": 3,
"id": "10a025b9-4483-4925-873e-6653b64441e3",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:11.688286Z",
+ "start_time": "2024-12-13T19:23:10.323624Z"
+ }
+ },
"outputs": [],
"source": [
"import requests\n",
@@ -175,7 +193,12 @@
"cell_type": "code",
"execution_count": 4,
"id": "43cace42-aa55-469e-84d9-13a45115267e",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:12.154657Z",
+ "start_time": "2024-12-13T19:23:11.695335Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -190,8 +213,6 @@
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"Root-level Groups:\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\t\u001b[36mAre all items the same? ---> True. (No items exist.)\u001b[0m\n",
- "\u001b[0m\u001b[37m\u001b[0m\u001b[90m\n",
- "No variable group selected for comparison. Skipping..\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"All variables:\u001b[0m\n",
"\u001b[0m File A File B\u001b[0m\n",
@@ -203,33 +224,47 @@
"\u001b[0m - -------------------------- --------------------------\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lat_bounds lat_bounds\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('latitude', 'nv') ('latitude', 'nv')\u001b[0m\n",
"\u001b[0m shape: (72, 2) (72, 2)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: latitude latitude\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('latitude',) ('latitude',)\u001b[0m\n",
"\u001b[0m shape: (72,) (72,)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lon_bounds lon_bounds\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('longitude', 'nv') ('longitude', 'nv')\u001b[0m\n",
"\u001b[0m shape: (144, 2) (144, 2)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: longitude longitude\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('longitude',) ('longitude',)\u001b[0m\n",
"\u001b[0m shape: (144,) (144,)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precip precip\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('time', 'latitude', 'longitude') ('time', 'latitude', 'longitude')\u001b[0m\n",
"\u001b[0m shape: (1, 72, 144) (1, 72, 144)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precip_error precip_error\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('time', 'latitude', 'longitude') ('time', 'latitude', 'longitude')\u001b[0m\n",
"\u001b[0m shape: (1, 72, 144) (1, 72, 144)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: time time\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('time',) ('time',)\u001b[0m\n",
"\u001b[0m shape: (1,) (1,)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: time_bounds time_bounds\u001b[0m\n",
"\u001b[0m dtype: float32 float32\u001b[0m\n",
+ "\u001b[0m dimensions: ('time', 'nv') ('time', 'nv')\u001b[0m\n",
"\u001b[0m shape: (1, 2) (1, 2)\u001b[0m\n",
"\u001b[0m - -------------------------- --------------------------\u001b[0m\n",
- "\u001b[0m Total number of shared items: 8 8\u001b[0m\n",
- "\u001b[0m Total number of non-shared items: 0 0\u001b[0m\n",
+ "\u001b[0m SUMMARY -------------------------- --------------------------\u001b[0m\n",
+ "\u001b[0m Total # of shared variables: 8 8\u001b[0m\n",
+ "\u001b[0m Total # of non-shared variables: 0 0\u001b[0m\n",
+ "\u001b[0m Total # of shared groups: 0 0\u001b[0m\n",
+ "\u001b[0m Total # of non-shared groups: 0 0\u001b[0m\n",
+ "\u001b[0m Total # of shared attributes: 24 24\u001b[0m\n",
+ "\u001b[0m Total # of non-shared attributes: 0 0\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\n",
"Done.\u001b[0m\n",
+ "\u001b[0m0\u001b[0m\n",
"\u001b[0m\u001b[0m"
]
}
@@ -251,7 +286,12 @@
"cell_type": "code",
"execution_count": 5,
"id": "c48728a0-1379-4a05-b7e6-ad50694510df",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:12.577663Z",
+ "start_time": "2024-12-13T19:23:12.161049Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -261,8 +301,6 @@
"\u001b[0m\u001b[37m\u001b[0mFile B: PERSIANN-CDR_v01r01_20230419_c20231030.nc\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"Root-level Dimensions:\u001b[0m\n",
- "/usr/local/Caskroom/miniconda/base/envs/ncompare-jupyter-example/lib/python3.12/site-packages/xarray/conventions.py:428: SerializationWarning: variable 'precipitation' has multiple fill values {-9999.0, -1.0}, decoding all values to NaN.\n",
- " new_vars[k] = decode_cf_variable(\n",
"\u001b[0m\u001b[37m\u001b[0m\tAre all items the same? ---> \u001b[31mFalse. (2 items are shared, out of 6 total.)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\t\u001b[31mWhich items are different?\u001b[0m\n",
"\u001b[0m File A File B\u001b[0m\n",
@@ -276,8 +314,6 @@
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"Root-level Groups:\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\t\u001b[36mAre all items the same? ---> True. (No items exist.)\u001b[0m\n",
- "\u001b[0m\u001b[37m\u001b[0m\u001b[90m\n",
- "No variable group selected for comparison. Skipping..\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"All variables:\u001b[0m\n",
"\u001b[0m File A File B\u001b[0m\n",
@@ -289,49 +325,72 @@
"\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lat\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat',)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480,)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lat_bnds\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat', 'nv')\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480, 2)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lat_bounds \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude', 'nv') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72, 2) \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: latitude \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude',) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72,) \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lon\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon',)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440,)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lon_bnds\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon', 'nv')\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440, 2)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lon_bounds \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude', 'nv') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144, 2) \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: longitude \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude',) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144,) \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precip \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precip_error \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precipitation\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'lon', 'lat')\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 1440, 480)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: time time\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 int32\u001b[0m\n",
+ "\u001b[0m dimensions: ('time',) ('time',)\u001b[0m\n",
"\u001b[0m shape: (1,) (1,)\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: time_bounds \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'nv') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 2) \u001b[0m\n",
"\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n",
- "\u001b[0m Total number of shared items: 1 1\u001b[0m\n",
- "\u001b[0m Total number of non-shared items: 7 5\u001b[0m\n",
+ "\u001b[0m SUMMARY ------------------------------ ------------------------------\u001b[0m\n",
+ "\u001b[0m Total # of shared variables: 1 1\u001b[0m\n",
+ "\u001b[0m Total # of non-shared variables: 7 5\u001b[0m\n",
+ "\u001b[0m Total # of shared groups: 0 0\u001b[0m\n",
+ "\u001b[0m Total # of non-shared groups: 0 0\u001b[0m\n",
+ "\u001b[0m Total # of shared attributes: 2 2\u001b[0m\n",
+ "\u001b[0m Total # of non-shared attributes: 22 16\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
+ "Differences were found in these attributes:\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
+ "['dimensions', 'dtype', 'shape']\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\n",
"Done.\u001b[0m\n",
- "\u001b[0m\u001b[0m\u001b[0m"
+ "\u001b[0m50\u001b[0m\n",
+ "\u001b[0m\u001b[0m"
]
}
],
@@ -351,7 +410,12 @@
"cell_type": "code",
"execution_count": 6,
"id": "1dd4c51a-394c-4569-b8b1-053743e63cb9",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:12.990375Z",
+ "start_time": "2024-12-13T19:23:12.583132Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -361,8 +425,6 @@
"\u001b[0m\u001b[37m\u001b[0mFile B: PERSIANN-CDR_v01r01_20230419_c20231030.nc\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"Root-level Dimensions:\u001b[0m\n",
- "/usr/local/Caskroom/miniconda/base/envs/ncompare-jupyter-example/lib/python3.12/site-packages/xarray/conventions.py:428: SerializationWarning: variable 'precipitation' has multiple fill values {-9999.0, -1.0}, decoding all values to NaN.\n",
- " new_vars[k] = decode_cf_variable(\n",
"\u001b[0m\u001b[37m\u001b[0m\tAre all items the same? ---> \u001b[31mFalse. (2 items are shared, out of 6 total.)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\t\u001b[31mWhich items are different?\u001b[0m\n",
"\u001b[0m File A File B\u001b[0m\n",
@@ -376,8 +438,6 @@
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"Root-level Groups:\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\t\u001b[36mAre all items the same? ---> True. (No items exist.)\u001b[0m\n",
- "\u001b[0m\u001b[37m\u001b[0m\u001b[90m\n",
- "No variable group selected for comparison. Skipping..\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
"All variables:\u001b[0m\n",
"\u001b[0m File A File B\u001b[0m\n",
@@ -389,6 +449,7 @@
"\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lat\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat',)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480,)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mbounds: lat_bnds\u001b[0m\n",
@@ -399,15 +460,18 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_min: -60.0\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lat_bnds\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lat', 'nv')\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (480, 2)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lat_bounds \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude', 'nv') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72, 2) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mcomment: latitude values at the north and south bounds of each pixel. \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: latitude \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('latitude',) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (72,) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31maxis: Y \u001b[0m\n",
@@ -418,6 +482,7 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [-90.0, 90.0, ...] \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lon\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon',)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440,)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mbounds: lon_bnds\u001b[0m\n",
@@ -428,15 +493,18 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_min: 0.0\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lon_bnds\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('lon', 'nv')\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1440, 2)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: lon_bounds \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude', 'nv') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144, 2) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mcomment: longitude values at the west and east bounds of each pixel. \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: longitude \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('longitude',) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (144,) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31maxis: X \u001b[0m\n",
@@ -447,6 +515,7 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [0.0, 360.0, ...] \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precip \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mcell_methods: area: mean time: mean \u001b[0m\n",
@@ -458,6 +527,7 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [0.0, 100.0, ...] \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precip_error \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'latitude', 'longitude') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 72, 144) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mcoordinates: time latitude longitude \u001b[0m\n",
@@ -467,6 +537,7 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_range: [0.0, 100.0, ...] \u001b[0m\n",
"\u001b[0m -----VARIABLE-----: precipitation\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'lon', 'lat')\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 1440, 480)\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: [1, 1440, 480]\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31m_FillValue: -1.0\u001b[0m\n",
@@ -479,6 +550,7 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mvalid_min: 0.0\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: time time\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 int32\u001b[0m\n",
+ "\u001b[0m dimensions: ('time',) ('time',)\u001b[0m\n",
"\u001b[0m shape: (1,) (1,)\u001b[0m\n",
"\u001b[0m chunksize: contiguous contiguous\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31maxis: T \u001b[0m\n",
@@ -489,15 +561,26 @@
"\u001b[0m\u001b[37m\u001b[0m \u001b[31munits: days since 1970-01-01 00:00:00 0:00 days since 1979-01-01 0:0:0\u001b[0m\n",
"\u001b[0m -----VARIABLE-----: time_bounds \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mdtype: float32 \u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m \u001b[31mdimensions: ('time', 'nv') \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mshape: (1, 2) \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mchunksize: contiguous \u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m \u001b[31mcomment: time bounds for each time value \u001b[0m\n",
"\u001b[0m - ------------------------------ ------------------------------\u001b[0m\n",
- "\u001b[0m Total number of shared items: 1 1\u001b[0m\n",
- "\u001b[0m Total number of non-shared items: 7 5\u001b[0m\n",
+ "\u001b[0m SUMMARY ------------------------------ ------------------------------\u001b[0m\n",
+ "\u001b[0m Total # of shared variables: 1 1\u001b[0m\n",
+ "\u001b[0m Total # of non-shared variables: 7 5\u001b[0m\n",
+ "\u001b[0m Total # of shared groups: 0 0\u001b[0m\n",
+ "\u001b[0m Total # of non-shared groups: 0 0\u001b[0m\n",
+ "\u001b[0m Total # of shared attributes: 5 5\u001b[0m\n",
+ "\u001b[0m Total # of non-shared attributes: 60 42\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
+ "Differences were found in these attributes:\u001b[0m\n",
+ "\u001b[0m\u001b[37m\u001b[0m\u001b[94m\n",
+ "['_FillValue', 'axis', 'bounds', 'calendar', 'cell_method', 'cell_methods', 'chunksize', 'comment', 'coordinates', 'dimensions', 'dtype', 'long_name', 'missing_value', 'shape', 'standard_name', 'units', 'valid_max', 'valid_min', 'valid_range']\u001b[0m\n",
"\u001b[0m\u001b[37m\u001b[0m\n",
"Done.\u001b[0m\n",
- "\u001b[0m\u001b[0m\u001b[0m"
+ "\u001b[0m114\u001b[0m\n",
+ "\u001b[0m\u001b[0m"
]
}
],
@@ -505,6 +588,235 @@
"! ncompare --show-attributes --show-chunks --column-widths 33 30 30 {file_names[0]} {file_names[2]}"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "7e1344892c25806a",
+ "metadata": {},
+ "source": [
+ "# Python Package Usage Example\n",
+ "----"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "525f98b5cbb923",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:13.326162Z",
+ "start_time": "2024-12-13T19:23:12.996001Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from ncompare import compare"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "f3363c6630447104",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-12-13T19:23:13.428938Z",
+ "start_time": "2024-12-13T19:23:13.330402Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "File A: gpcp_v02r03_monthly_d202301_c20230411.nc\n",
+ "File B: PERSIANN-CDR_v01r01_20230419_c20231030.nc\n",
+ "\n",
+ "Root-level Dimensions:\n",
+ "\tAre all items the same? ---> False. (2 items are shared, out of 6 total.)\n",
+ "\tWhich items are different?\n",
+ " File A File B\n",
+ " #00 ------------------------------ ------------------('lat', 480)\n",
+ " #01 --------------('latitude', 72) ------------------------------\n",
+ " #02 ------------------------------ -----------------('lon', 1440)\n",
+ " #03 ------------('longitude', 144) ------------------------------\n",
+ "\n",
+ "Root-level Groups:\n",
+ "\tAre all items the same? ---> True. (No items exist.)\n",
+ "\n",
+ "All variables:\n",
+ " File A File B\n",
+ " All Variables \n",
+ " - ------------------------------ ------------------------------\n",
+ " \n",
+ " GROUP #00 -----------------------------/ -----------------------------/\n",
+ " num variables in group: 8 6\n",
+ " - ------------------------------ ------------------------------\n",
+ " -----VARIABLE-----: lat\n",
+ " dtype: float32\n",
+ " dimensions: ('lat',)\n",
+ " shape: (480,)\n",
+ " chunksize: contiguous\n",
+ " bounds: lat_bnds\n",
+ " long_name: latitude\n",
+ " standard_name: latitude\n",
+ " units: degrees_north\n",
+ " valid_max: 60.0\n",
+ " valid_min: -60.0\n",
+ " -----VARIABLE-----: lat_bnds\n",
+ " dtype: float32\n",
+ " dimensions: ('lat', 'nv')\n",
+ " shape: (480, 2)\n",
+ " chunksize: contiguous\n",
+ " -----VARIABLE-----: lat_bounds \n",
+ " dtype: float32 \n",
+ " dimensions: ('latitude', 'nv') \n",
+ " shape: (72, 2) \n",
+ " chunksize: contiguous \n",
+ " comment: latitude values at the north and south bounds of each pixel. \n",
+ " -----VARIABLE-----: latitude \n",
+ " dtype: float32 \n",
+ " dimensions: ('latitude',) \n",
+ " shape: (72,) \n",
+ " chunksize: contiguous \n",
+ " axis: Y \n",
+ " bounds: lat_bounds \n",
+ " long_name: Latitude \n",
+ " standard_name: latitude \n",
+ " units: degrees_north \n",
+ " valid_range: [-90.0, 90.0, ...] \n",
+ " -----VARIABLE-----: lon\n",
+ " dtype: float32\n",
+ " dimensions: ('lon',)\n",
+ " shape: (1440,)\n",
+ " chunksize: contiguous\n",
+ " bounds: lon_bnds\n",
+ " long_name: longitude\n",
+ " standard_name: longitude\n",
+ " units: degrees_east\n",
+ " valid_max: 360.0\n",
+ " valid_min: 0.0\n",
+ " -----VARIABLE-----: lon_bnds\n",
+ " dtype: float32\n",
+ " dimensions: ('lon', 'nv')\n",
+ " shape: (1440, 2)\n",
+ " chunksize: contiguous\n",
+ " -----VARIABLE-----: lon_bounds \n",
+ " dtype: float32 \n",
+ " dimensions: ('longitude', 'nv') \n",
+ " shape: (144, 2) \n",
+ " chunksize: contiguous \n",
+ " comment: longitude values at the west and east bounds of each pixel. \n",
+ " -----VARIABLE-----: longitude \n",
+ " dtype: float32 \n",
+ " dimensions: ('longitude',) \n",
+ " shape: (144,) \n",
+ " chunksize: contiguous \n",
+ " axis: X \n",
+ " bounds: lon_bounds \n",
+ " long_name: Longitude \n",
+ " standard_name: longitude \n",
+ " units: degrees_east \n",
+ " valid_range: [0.0, 360.0, ...] \n",
+ " -----VARIABLE-----: precip \n",
+ " dtype: float32 \n",
+ " dimensions: ('time', 'latitude', 'longitude') \n",
+ " shape: (1, 72, 144) \n",
+ " chunksize: contiguous \n",
+ " cell_methods: area: mean time: mean \n",
+ " coordinates: time latitude longitude \n",
+ " long_name: NOAA Climate Data Record (CDR) of GPCP Monthly Satellite-Gauge Combined Precipitation \n",
+ " missing_value: -9999.0 \n",
+ " standard_name: precipitation amount \n",
+ " units: mm/day \n",
+ " valid_range: [0.0, 100.0, ...] \n",
+ " -----VARIABLE-----: precip_error \n",
+ " dtype: float32 \n",
+ " dimensions: ('time', 'latitude', 'longitude') \n",
+ " shape: (1, 72, 144) \n",
+ " chunksize: contiguous \n",
+ " coordinates: time latitude longitude \n",
+ " long_name: NOAA CDR of GPCP Satellite-Gauge Combined Precipitation Error \n",
+ " missing_value: -9999.0 \n",
+ " units: mm/day \n",
+ " valid_range: [0.0, 100.0, ...] \n",
+ " -----VARIABLE-----: precipitation\n",
+ " dtype: float32\n",
+ " dimensions: ('time', 'lon', 'lat')\n",
+ " shape: (1, 1440, 480)\n",
+ " chunksize: [1, 1440, 480]\n",
+ " _FillValue: -1.0\n",
+ " cell_method: sum\n",
+ " long_name: NOAA Climate Data Record of PERSIANN-CDR daily precipitation\n",
+ " missing_value: -9999.0\n",
+ " standard_name: precipitation_amount\n",
+ " units: mm\n",
+ " valid_max: 999999.0\n",
+ " valid_min: 0.0\n",
+ " -----VARIABLE-----: time time\n",
+ " dtype: float32 int32\n",
+ " axis: T \n",
+ " bounds: time_bounds \n",
+ " calendar: Gregorian \n",
+ " units: days since 1970-01-01 00:00:00 0:00 days since 1979-01-01 0:0:0\n",
+ " -----VARIABLE-----: time_bounds \n",
+ " dtype: float32 \n",
+ " dimensions: ('time', 'nv') \n",
+ " shape: (1, 2) \n",
+ " chunksize: contiguous \n",
+ " comment: time bounds for each time value \n",
+ " - ------------------------------ ------------------------------\n",
+ " SUMMARY ------------------------------ ------------------------------\n",
+ " Total # of shared variables: 1 1\n",
+ " Total # of non-shared variables: 7 5\n",
+ " Total # of shared groups: 0 0\n",
+ " Total # of non-shared groups: 0 0\n",
+ " Total # of shared attributes: 5 5\n",
+ " Total # of non-shared attributes: 60 42\n",
+ "\n",
+ "Differences were found in these attributes:\n",
+ "\n",
+ "['_FillValue', 'axis', 'bounds', 'calendar', 'cell_method', 'cell_methods', 'chunksize', 'comment', 'coordinates', 'dimensions', 'dtype', 'long_name', 'missing_value', 'shape', 'standard_name', 'units', 'valid_max', 'valid_min', 'valid_range']\n",
+ "\n",
+ "Done.\n"
+ ]
+ }
+ ],
+ "source": [
+ "total_number_of_differences = compare(\n",
+ " file_names[0],\n",
+ " file_names[2],\n",
+ " only_diffs=True,\n",
+ " show_attributes=True,\n",
+ " show_chunks=True,\n",
+ " column_widths=[33, 30, 30],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8b3f36e-c4ed-4392-81a8-fedcbd6fa3c8",
+ "metadata": {},
+ "source": [
+ "The output of `ncompare` is the total number of differences (across _variables_, _groups_, and _attributes_):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d93e1ef4-4bf9-4e48-b166-d42ca2ff42e9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "114\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(total_number_of_differences)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "dccb326d-3b47-4d0f-b96d-93577d3e7c54",
@@ -516,9 +828,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "ncompare-jupyter-example",
+ "display_name": "temp_for_ncompare_test",
"language": "python",
- "name": "ncompare-jupyter-example"
+ "name": "temp_for_ncompare_test"
},
"language_info": {
"codemirror_mode": {
@@ -530,7 +842,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.0"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/ncompare/__init__.py b/ncompare/__init__.py
index 0fc1ce4..e2def66 100644
--- a/ncompare/__init__.py
+++ b/ncompare/__init__.py
@@ -24,3 +24,15 @@
# See the License for the specific language governing permissions and limitations under the License.
"""Main code for comparing NetCDF files."""
+
+from importlib.metadata import version
+
+from .core import (
+ compare,
+)
+
+__all__ = [
+ "compare",
+]
+
+__version__ = version("ncompare")
diff --git a/ncompare/console.py b/ncompare/console.py
index e530043..8aad37a 100755
--- a/ncompare/console.py
+++ b/ncompare/console.py
@@ -43,16 +43,14 @@ def _cli(args: Optional[Sequence[str]]) -> argparse.Namespace:
Parameters
----------
- args : None or list[str]
- if None, then argparse will use sys.argv[1:]
+ args
+ if None, then argparse will use `sys.argv[1:]`
"""
parser = argparse.ArgumentParser(
description="Compare the variables contained within two different NetCDF datasets"
)
parser.add_argument("nc_a", help="First NetCDF file")
parser.add_argument("nc_b", help="Second NetCDF file")
- parser.add_argument("-v", "--comparison_var_name", help="Comparison variable name")
- parser.add_argument("-g", "--comparison_var_group", help="Comparison variable group")
parser.add_argument(
"--only-diffs",
action="store_true",
@@ -110,10 +108,11 @@ def main() -> None: # pragma: no cover
delattr(args, "version")
try:
- compare(**vars(args))
+ total_diff_count = compare(**vars(args))
except Exception: # pylint: disable=broad-exception-caught
print(traceback.format_exc())
sys.exit(1)
+ print(total_diff_count)
sys.exit(0) # a clean, no-issue, exit
diff --git a/ncompare/core.py b/ncompare/core.py
index 66be6a5..9d6ca0b 100644
--- a/ncompare/core.py
+++ b/ncompare/core.py
@@ -29,23 +29,23 @@
"""Compare the structure of two NetCDF files."""
-import random
-import traceback
+import warnings
from collections import namedtuple
from collections.abc import Iterable, Iterator
from pathlib import Path
-from typing import Optional, Union
+from typing import Optional, TypedDict, Union
import netCDF4
-import numpy as np
import xarray as xr
-from colorama import Fore, Style
+from colorama import Fore
-from ncompare.printing import Outputter
+from ncompare.printing import Outputter, SummaryDifferenceKeys
from ncompare.sequence_operations import common_elements, count_diffs
from ncompare.utils import ensure_valid_path_exists, ensure_valid_path_with_suffix
-VarProperties = namedtuple("VarProperties", "varname, variable, dtype, shape, chunking, attributes")
+VarProperties = namedtuple(
+ "VarProperties", "varname, variable, dtype, dimensions, shape, chunking, attributes"
+)
GroupPair = namedtuple(
"GroupPair",
@@ -54,11 +54,17 @@
)
+class SummaryDifferencesDict(TypedDict):
+ shared: int
+ left: int
+ right: int
+ both: int
+ difference_types: set
+
+
def compare(
nc_a: Union[str, Path],
nc_b: Union[str, Path],
- comparison_var_group: Optional[str] = None,
- comparison_var_name: Optional[str] = None,
only_diffs: bool = False,
no_color: bool = False,
show_chunks: bool = False,
@@ -67,39 +73,36 @@ def compare(
file_csv: Union[str, Path] = "",
file_xlsx: Union[str, Path] = "",
column_widths: Optional[tuple[Union[int, str], Union[int, str], Union[int, str]]] = None,
-) -> None:
- """Compare the variables contained within two different NetCDF datasets.
+) -> int:
+ """Compare the variables contained within two different netCDF datasets.
Parameters
----------
- nc_a : str
- filepath to NetCDF4
- nc_b : str
- filepath to NetCDF4
- comparison_var_group : str, optional
- The name of a group which contains a desired comparison variable
- comparison_var_name : str, optional
- The name of a variable for which we want to compare values
- only_diffs : bool, optional
+ nc_a
+ filepath to the first netCDF
+ nc_b
+ filepath to the second netCDF
+ only_diffs
Whether to show only the variables/attributes that are different between the two files
- no_color : bool, default False
+ no_color
Turns off the use of ANSI escape character sequences for producing colored terminal text
- show_chunks : bool, default False
+ show_chunks
Whether to include data chunk sizes in the displayed comparison of variables
- show_attributes : bool, default False
+ show_attributes
Whether to include variable attributes in the displayed comparison of variables
- file_text : str
+ file_text
filepath destination to save captured text output as a TXT file.
- file_csv : str
+ file_csv
filepath destination to save comparison output as comma-separated values (CSV).
- file_xlsx : str
+ file_xlsx
filepath destination to save comparison output as an Excel workbook.
- column_widths : tuple[int | str, int | str, int | str], optional
+ column_widths
the width in number of characters for each column of the comparison table.
Returns
-------
- None
+ int
+ total number of differences found (across variables, groups, and attributes)
"""
# Check the validity of paths.
nc_a = ensure_valid_path_exists(nc_a)
@@ -123,12 +126,10 @@ def compare(
out.print(f"File B: {nc_b}")
# Start the comparison process.
- run_through_comparisons(
+ total_diff_count = run_through_comparisons(
out,
nc_a,
nc_b,
- comparison_var_group=comparison_var_group,
- comparison_var_name=comparison_var_name,
show_chunks=show_chunks,
show_attributes=show_attributes,
)
@@ -141,27 +142,35 @@ def compare(
out.print("\nDone.", colors=False)
+ return total_diff_count
+
def run_through_comparisons(
out: Outputter,
nc_a: Union[str, Path],
nc_b: Union[str, Path],
- comparison_var_group: Optional[str],
- comparison_var_name: Optional[str],
show_chunks: bool,
show_attributes: bool,
-) -> None:
- """Execute a series of comparisons between two NetCDF files.
+) -> int:
+ """Execute a series of comparisons between two netCDF files.
Parameters
----------
out
+ instance of Outputter
nc_a
+ path to the first netCDF file
nc_b
- comparison_var_group
- comparison_var_name
+ path to the second netCDF file
show_chunks
+ whether to include data chunk sizes in the displayed comparison of variables
show_attributes
+ whether to include variable attributes in the displayed comparison of variables
+
+ Returns
+ -------
+ int
+ total number of differences found (across variables, groups, and attributes)
"""
# Show the dimensions of each file and evaluate differences.
out.print(Fore.LIGHTBLUE_EX + "\nRoot-level Dimensions:", add_to_history=True)
@@ -175,90 +184,12 @@ def run_through_comparisons(
list_b = _get_groups(nc_b)
_, _, _ = out.lists_diff(list_a, list_b)
- if comparison_var_group:
- # Show the variables within the selected group.
- out.print(
- Fore.LIGHTBLUE_EX + f"\nVariables within specified group <{comparison_var_group}>:",
- add_to_history=True,
- )
- vlist_a = _get_vars(nc_a, comparison_var_group)
- vlist_b = _get_vars(nc_b, comparison_var_group)
- _, _, _ = out.lists_diff(vlist_a, vlist_b)
-
- # TODO: Remove comparison variable/val?
- if comparison_var_name:
- try:
- # Print the first part of the values array for the selected variable.
- out.print(
- Fore.LIGHTBLUE_EX
- + f"\nSample values within specified variable <{comparison_var_name}>:"
- )
- _print_sample_values(out, nc_a, comparison_var_group, comparison_var_name)
- _print_sample_values(out, nc_b, comparison_var_group, comparison_var_name)
- # compare_sample_values(nc_a, nc_b, groupname=comparison_var_group, varname=comparison_var_name)
-
- out.print(
- Fore.LIGHTBLUE_EX
- + f"\nChecking multiple random values within specified variable <{comparison_var_name}>:"
- )
- compare_multiple_random_values(
- out,
- nc_a,
- nc_b,
- groupname=comparison_var_group,
- varname=comparison_var_name,
- )
-
- except KeyError:
- out.print(
- Style.BRIGHT
- + Fore.RED
- + f"\nError when comparing values for variable <{comparison_var_name}> "
- f"in group <{comparison_var_group}>."
- )
- out.print(traceback.format_exc())
- out.print("\n")
- else:
- out.print(Fore.LIGHTBLACK_EX + "\nNo variable selected for comparison. Skipping..")
- else:
- out.print(Fore.LIGHTBLACK_EX + "\nNo variable group selected for comparison. Skipping..")
-
out.print(Fore.LIGHTBLUE_EX + "\nAll variables:", add_to_history=True)
- _, _, _ = compare_two_nc_files(
+ total_diff_count = compare_two_nc_files(
out, nc_a, nc_b, show_chunks=show_chunks, show_attributes=show_attributes
)
-
-def compare_multiple_random_values(
- out: Outputter,
- nc_a: Union[str, Path],
- nc_b: Union[str, Path],
- groupname: str,
- varname: str,
- num_comparisons: int = 100,
-):
- """Iterate through N random samples, and evaluate whether the differences exceed a threshold."""
- # Open a variable from each NetCDF
- nc_var_a = xr.open_dataset(nc_a, backend_kwargs={"group": groupname}).variables[varname]
- nc_var_b = xr.open_dataset(nc_b, backend_kwargs={"group": groupname}).variables[varname]
-
- num_mismatches = 0
- for _ in range(num_comparisons):
- match_result = _match_random_value(out, nc_var_a, nc_var_b)
- if match_result is True:
- out.print(".", colors=False, end="")
- elif match_result is None:
- out.print("n", colors=False, end="")
- num_mismatches += 1
- else:
- out.print("x", colors=False, end="")
- num_mismatches += 1
-
- if num_mismatches > 0:
- out.print(Fore.RED + f" {num_mismatches} mismatches, out of {num_comparisons} samples.")
- else:
- out.print(Fore.CYAN + " No mismatches.")
- out.print("Done.", colors=False)
+ return total_diff_count
def walk_common_groups_tree(
@@ -271,10 +202,14 @@ def walk_common_groups_tree(
Parameters
----------
- top_a_name : str
- top_a : netCDF4.Dataset or netCDF4.Group
- top_b_name : str
- top_b : netCDF4.Dataset or netCDF4.Group
+ top_a_name
+ name of the first group or dataset
+ top_a
+ the first group or dataset
+ top_b_name
+ name of the second group or dataset
+ top_b
+ the second group or dataset
Yields
------
@@ -325,11 +260,50 @@ def compare_two_nc_files(
nc_two: Union[str, Path],
show_chunks: bool = False,
show_attributes: bool = False,
-) -> tuple[int, int, int]:
- """Go through all groups and all variables, and show them side by side - whether they align and where they don't."""
- out.side_by_side(" ", "File A", "File B", force_display_even_if_same=True)
+) -> int:
+ """Go through all groups and all variables, and show them side by side,
+ highlighting whether they align and where they don't.
- num_var_diffs = {"left": 0, "right": 0, "both": 0}
+ Parameters
+ ----------
+ out
+ instance of Outputter
+ nc_one
+ path to the first dataset
+ nc_two
+ path to the second dataset
+ show_chunks
+ whether to include chunks alongside variables
+ show_attributes
+ whether to include variable attributes
+
+ Returns
+ -------
+ int
+ total number of differences found (across variables, groups, and attributes)
+ """
+ out.side_by_side(" ", "File A", "File B", force_display_even_if_same=True)
+ num_group_diffs: SummaryDifferencesDict = {
+ "shared": 0,
+ "left": 0,
+ "right": 0,
+ "both": 0,
+ "difference_types": set(),
+ }
+ num_var_diffs: SummaryDifferencesDict = {
+ "shared": 0,
+ "left": 0,
+ "right": 0,
+ "both": 0,
+ "difference_types": set(),
+ }
+ num_attribute_diffs: SummaryDifferencesDict = {
+ "shared": 0,
+ "left": 0,
+ "right": 0,
+ "both": 0,
+ "difference_types": set(),
+ }
with netCDF4.Dataset(nc_one) as nc_a, netCDF4.Dataset(nc_two) as nc_b:
out.side_by_side(
"All Variables", " ", " ", dash_line=False, force_display_even_if_same=True
@@ -345,12 +319,20 @@ def compare_two_nc_files(
"/",
group_counter,
num_var_diffs,
+ num_attribute_diffs,
show_attributes,
show_chunks,
)
group_counter += 1
for group_pair in walk_common_groups_tree("", nc_a, "", nc_b):
+ if group_pair.group_a_name == "":
+ num_group_diffs["right"] += 1
+ elif group_pair.group_b_name == "":
+ num_group_diffs["left"] += 1
+ else:
+ num_group_diffs["shared"] += 1
+
_print_group_details_side_by_side(
out,
group_pair.group_a,
@@ -359,45 +341,74 @@ def compare_two_nc_files(
group_pair.group_b_name,
group_counter,
num_var_diffs,
+ num_attribute_diffs,
show_attributes,
show_chunks,
)
group_counter += 1
out.side_by_side("-", "-", "-", dash_line=True, force_display_even_if_same=True)
+ out.side_by_side("SUMMARY", "-", "-", dash_line=True, force_display_even_if_same=True)
+
+ _print_summary_count_comparison_side_by_side(out, "variable", num_var_diffs)
+ _print_summary_count_comparison_side_by_side(out, "group", num_group_diffs)
+ _print_summary_count_comparison_side_by_side(out, "attribute", num_attribute_diffs)
+ if num_attribute_diffs["difference_types"]:
+ out.print(
+ Fore.LIGHTBLUE_EX + "\nDifferences were found in these attributes:", add_to_history=True
+ )
+ out.print(
+ Fore.LIGHTBLUE_EX + f"\n{sorted(num_attribute_diffs['difference_types'])}",
+ add_to_history=True,
+ )
+
+ # Return the total number of differences; thus, zero means no differences were found.
+ total_diff_count = sum(
+ [x["left"] + x["right"] for x in [num_var_diffs, num_group_diffs, num_attribute_diffs]]
+ )
+
+ return total_diff_count
+
+
+def _print_summary_count_comparison_side_by_side(
+ out: Outputter,
+ item_type: str,
+ diff_dictionary: SummaryDifferencesDict,
+) -> None:
+ # Tally up instances where there were non-empty entries on both left and right sides.
+ diff_dictionary["left"] += diff_dictionary["both"]
+ diff_dictionary["right"] += diff_dictionary["both"]
+
out.side_by_side(
- "Total number of shared items:",
- str(num_var_diffs["both"]),
- str(num_var_diffs["both"]),
+ f"Total # of shared {item_type}s:",
+ str(diff_dictionary["shared"]),
+ str(diff_dictionary["shared"]),
force_display_even_if_same=True,
)
+
out.side_by_side(
- "Total number of non-shared items:",
- str(num_var_diffs["left"]),
- str(num_var_diffs["right"]),
+ f"Total # of non-shared {item_type}s:",
+ str(diff_dictionary["left"]),
+ str(diff_dictionary["right"]),
force_display_even_if_same=True,
)
- return num_var_diffs["left"], num_var_diffs["right"], num_var_diffs["both"]
def _print_group_details_side_by_side(
- out,
+ out: Outputter,
group_a: Union[netCDF4.Dataset, netCDF4.Group],
group_a_name: str,
group_b: Union[netCDF4.Dataset, netCDF4.Group],
group_b_name: str,
group_counter: int,
- num_var_diffs: dict,
+ num_var_diffs: SummaryDifferencesDict,
+ num_attribute_diffs: SummaryDifferencesDict,
show_attributes: bool,
show_chunks: bool,
) -> None:
+ """Align and display group details side by side."""
out.side_by_side(
- " ",
- " ",
- " ",
- dash_line=False,
- highlight_diff=False,
- force_display_even_if_same=True,
+ " ", " ", " ", dash_line=False, highlight_diff=False, force_display_even_if_same=True
)
out.side_by_side(
f"GROUP #{group_counter:02}",
@@ -425,10 +436,10 @@ def _print_group_details_side_by_side(
out.side_by_side("-", "-", "-", dash_line=True, force_display_even_if_same=True)
# Count differences between the lists of variables in this group.
- left, right, both = count_diffs(vars_a_sorted, vars_b_sorted)
+ left, right, shared = count_diffs(vars_a_sorted, vars_b_sorted)
num_var_diffs["left"] += left
num_var_diffs["right"] += right
- num_var_diffs["both"] += both
+ num_var_diffs["shared"] += shared
# Go through each variable in the current group.
for variable_pair in common_elements(vars_a_sorted, vars_b_sorted):
@@ -437,47 +448,47 @@ def _print_group_details_side_by_side(
out,
_var_properties(group_a, variable_pair[1]),
_var_properties(group_b, variable_pair[2]),
+ num_attribute_diffs,
show_chunks=show_chunks,
show_attributes=show_attributes,
)
def _print_var_properties_side_by_side(
- out,
+ out: Outputter,
v_a: VarProperties,
v_b: VarProperties,
+ num_attribute_diffs: SummaryDifferencesDict,
show_chunks: bool = False,
show_attributes: bool = False,
-):
- # Gather all variable property pairs first, before printing, so we can decide whether to highlight the variable header
+) -> None:
+ """Align and display variable properties side by side."""
+ # Gather all variable property pairs first, before printing,
+ # so we can decide whether to highlight the variable header.
pairs_to_check_and_show = [
(v_a.dtype, v_b.dtype),
+ (v_a.dimensions, v_b.dimensions),
(v_a.shape, v_b.shape),
]
if show_chunks:
pairs_to_check_and_show.append((v_a.chunking, v_b.chunking))
if show_attributes:
- for attr_a_key, attr_a, attr_b_key, attr_b in get_and_check_variable_attributes(v_a, v_b):
- # Check whether attr_a_key is empty, because it might be if the variable doesn't exist in File A.
+ for attr_a_key, attr_a, attr_b_key, attr_b in _get_and_check_variable_attributes(v_a, v_b):
+ # Check whether attr_a_key is empty,
+ # because it might be if the variable doesn't exist in File A.
pairs_to_check_and_show.append((attr_a, attr_b))
# Scale Factor
- scale_factor_pair = get_and_check_variable_scale_factor(v_a, v_b)
+ scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b)
if scale_factor_pair:
pairs_to_check_and_show.append((scale_factor_pair[0], scale_factor_pair[1]))
- # print(f"pairs_to_check_and_show === {pairs_to_check_and_show}")
there_is_a_difference = False
for pair in pairs_to_check_and_show:
if pair[0] != pair[1]:
there_is_a_difference = True
break
- # Variable name
- # header_color = None
- # if there_is_a_difference:
- # header_color = Fore.RED
-
- # If all attributes are the same, and keep-only-diffs is set -> DONT print
+ # If all attributes are the same, and keep-only-diffs is set -> DON'T print
# If all attributes are the same, and keep-only-diffs is NOT set -> print
# If some attributes are different -> print no matter else
if there_is_a_difference or (not out.keep_only_diffs):
@@ -489,31 +500,38 @@ def _print_var_properties_side_by_side(
force_display_even_if_same=True,
)
- # Data type
- out.side_by_side("dtype:", v_a.dtype, v_b.dtype, highlight_diff=True)
- # Shape
- out.side_by_side("shape:", v_a.shape, v_b.shape, highlight_diff=True)
+ # Go through each attribute, show differences, and add differences to running tally.
+ def _var_attribute_side_by_side(attribute_name, attribute_a, attribute_b):
+ diff_condition: SummaryDifferenceKeys = out.side_by_side(
+ f"{attribute_name}:", attribute_a, attribute_b, highlight_diff=True
+ )
+ num_attribute_diffs[diff_condition] += 1
+ if diff_condition in ("left", "right", "both"):
+ num_attribute_diffs["difference_types"].add(attribute_name)
+
+ _var_attribute_side_by_side("dtype", v_a.dtype, v_b.dtype)
+ _var_attribute_side_by_side("dimensions", v_a.dimensions, v_b.dimensions)
+ _var_attribute_side_by_side("shape", v_a.shape, v_b.shape)
# Chunking
if show_chunks:
- out.side_by_side("chunksize:", v_a.chunking, v_b.chunking, highlight_diff=True)
- # Attributes
- if show_attributes:
- for attr_a_key, attr_a, attr_b_key, attr_b in get_and_check_variable_attributes(v_a, v_b):
- # Check whether attr_a_key is empty, because it might be if the variable doesn't exist in File A.
- out.side_by_side(
- f"{attr_a_key if attr_a_key else attr_b_key}:",
- attr_a,
- attr_b,
- highlight_diff=True,
- )
-
+ _var_attribute_side_by_side("chunksize", v_a.chunking, v_b.chunking)
# Scale Factor
- scale_factor_pair = get_and_check_variable_scale_factor(v_a, v_b)
+ scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b)
if scale_factor_pair:
- out.side_by_side("sf:", scale_factor_pair[0], scale_factor_pair[1], highlight_diff=True)
+ _var_attribute_side_by_side("scale_factor", scale_factor_pair[0], scale_factor_pair[1])
+ # Other attributes
+ if show_attributes:
+ for attr_a_key, attr_a, attr_b_key, attr_b in _get_and_check_variable_attributes(v_a, v_b):
+ # Check whether attr_a_key is empty,
+ # because it might be if the variable doesn't exist in File A.
+ attribute_key = attr_a_key if attr_a_key else attr_b_key
+ _var_attribute_side_by_side(attribute_key, attr_a, attr_b)
-def get_and_check_variable_scale_factor(v_a, v_b) -> Union[None, tuple[str, str]]:
+def _get_and_check_variable_scale_factor(
+ v_a: VarProperties, v_b: VarProperties
+) -> Union[None, tuple[str, str]]:
+ """Get a string representation of the scale factor for two variables."""
if getattr(v_a.variable, "scale_factor", None):
sf_a = v_a.variable.scale_factor
else:
@@ -528,7 +546,10 @@ def get_and_check_variable_scale_factor(v_a, v_b) -> Union[None, tuple[str, str]
return None
-def get_and_check_variable_attributes(v_a, v_b):
+def _get_and_check_variable_attributes(
+ v_a: VarProperties, v_b: VarProperties
+) -> Iterator[tuple[str, str, str, str]]:
+ """Go through and yield each attribute pair for two variables."""
# Get the name of attributes if they exist
attrs_a_names = []
if v_a.attributes:
@@ -548,91 +569,45 @@ def _var_properties(group: Union[netCDF4.Dataset, netCDF4.Group], varname: str)
Parameters
----------
- group : `netCDF4.Dataset` or netCDF4.Group object
- varname : str
+ group
+ a dataset or group of variables
+ varname
+ the name of the variable
Returns
-------
- netCDF4.Variable
- str
- dtype of variable values
- tuple
- shape of variable
- tuple
- chunking
- dict
- any other attributes for this variable
+ VarProperties
"""
if varname:
the_variable = group.variables[varname]
v_dtype = str(the_variable.dtype)
+ v_dimensions = str(the_variable.dimensions)
v_shape = str(the_variable.shape).strip()
v_chunking = str(the_variable.chunking()).strip()
- v_attributes = {name: getattr(the_variable, name) for name in the_variable.ncattrs()}
+
+ v_attributes = {}
+ for name in the_variable.ncattrs():
+ try:
+ v_attributes[name] = the_variable.getncattr(name)
+ except KeyError as key_err:
+ # Added this check because of "unsupported datatype" error that prevented
+ # fully running comparisons on S5P_OFFL_L1B_IR_UVN collections.
+ v_attributes[name] = f"netCDF error: {str(key_err)}"
else:
the_variable = None
v_dtype = ""
+ v_dimensions = ""
v_shape = ""
v_chunking = ""
v_attributes = None
- return VarProperties(varname, the_variable, v_dtype, v_shape, v_chunking, v_attributes)
-
-
-def _match_random_value(
- out: Outputter, nc_var_a: xr.Variable, nc_var_b: xr.Variable, thresh: float = 1e-6
-) -> Union[bool, None]:
- """Check whether a randomly selected data point matches between two variables.
-
- Returns
- -------
- None or bool
- None if data point is null for one and only one of the variables
- True if values match
- False if the difference exceeds the given threshold
- """
- # Get a random indexer
- rand_index = []
- for dim_length in nc_var_a.shape:
- rand_index.append(random.randint(0, dim_length - 1))
- rand_index_tuple = tuple(rand_index)
-
- # Get the values from each variable
- value_a = nc_var_a.values[rand_index_tuple]
- value_b = nc_var_b.values[rand_index_tuple]
-
- # Check whether null
- if np.isnan(value_a) and np.isnan(value_b):
- return True
- elif np.isnan(value_a) or np.isnan(value_b):
- return None
-
- # Evaluate difference between values
- diff = value_b - value_a
- if abs(diff) > thresh:
- out.print()
- out.print(Fore.RED + f"Difference exceeded threshold (diff == {diff}")
- out.print(f"var shape: {nc_var_a.shape}", colors=False)
- out.print(f"indices: {rand_index_tuple}", colors=False)
- out.print(f"value a: {value_a}", colors=False)
- out.print(f"value b: {value_b}", colors=False, end="\n\n")
- return False
-
- return True
-
-
-def _print_sample_values(out: Outputter, nc_filepath, groupname: str, varname: str) -> None:
- comparison_variable = xr.open_dataset(nc_filepath, backend_kwargs={"group": groupname})[varname]
- vector_of_values = comparison_variable.values.flatten()
- n_values = len(vector_of_values)
- if n_values > 100:
- sample_length = 100
- else:
- sample_length = n_values
- out.print(str(vector_of_values[:sample_length]), colors=False)
+ return VarProperties(
+ varname, the_variable, v_dtype, v_dimensions, v_shape, v_chunking, v_attributes
+ )
def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) -> str:
+ """Get a string representation of the attribute value."""
if attribute_key and (attribute_key in varprops.attributes):
attr = varprops.attributes[attribute_key]
if isinstance(attr, Iterable) and not isinstance(attr, (str, float)):
@@ -647,27 +622,21 @@ def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) ->
return ""
-def _get_vars(nc_filepath: Union[str, Path], groupname: str) -> list:
- try:
- grp = xr.open_dataset(nc_filepath, backend_kwargs={"group": groupname})
- except OSError as err:
- print(f"\nError occurred when attempting to open group within <{nc_filepath}>.\n")
- raise err
- grp_varlist = sorted(list(grp.variables.keys())) # type:ignore[type-var]
-
- return grp_varlist
-
-
def _get_groups(nc_filepath: Union[str, Path]) -> list:
+ """Get a list of groups from a netCDF."""
with netCDF4.Dataset(nc_filepath) as dataset:
groups_list = list(dataset.groups.keys())
return groups_list
def _get_dims(nc_filepath: Union[str, Path]) -> list:
+ """Get a list of dimensions from a netCDF."""
+
def __get_dim_list(decode_times=True):
- with xr.open_dataset(nc_filepath, decode_times=decode_times) as dataset:
- return list(dataset.sizes.items())
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ with xr.open_dataset(nc_filepath, decode_times=decode_times) as dataset:
+ return list(dataset.sizes.items())
try:
dims_list = __get_dim_list()
diff --git a/ncompare/printing.py b/ncompare/printing.py
index 339c513..5224b30 100644
--- a/ncompare/printing.py
+++ b/ncompare/printing.py
@@ -29,9 +29,9 @@
import csv
import re
import warnings
-from collections.abc import Iterable
+from collections.abc import Iterable, Iterator
from pathlib import Path
-from typing import Optional, TextIO, Union
+from typing import Literal, Optional, TextIO, Union
import colorama
import openpyxl
@@ -41,6 +41,8 @@
from ncompare.sequence_operations import common_elements, count_diffs
+SummaryDifferenceKeys = Literal["shared", "left", "right", "both"]
+
# Set up regex remover of ANSI color escape sequences
# From
ansi_escape = re.compile(
@@ -77,6 +79,15 @@ def __init__(
Parameters
----------
keep_print_history
+ whether to keep printing history or not - used for file output
+ keep_only_diffs
+ whether to keep and print only comparisons that show differences
+ no_color
+ whether to turn off colorized output
+ text_file
+ optional path to a text file to write output to
+ column_widths
+ optional tuple of column widths to use for printing
"""
# Parse the print history option.
self._keep_print_history = keep_print_history
@@ -140,10 +151,10 @@ def print(
Parameters
----------
- string : str
- colors : bool
+ string
+ colors
If False, ANSI colors will be turned off.
- add_to_history : bool
+ add_to_history
print_args
Additional keyword arguments that are passed to the standard Python print() function.
"""
@@ -211,7 +222,7 @@ def side_by_side(
highlight_diff=False,
force_display_even_if_same=False,
force_color=None,
- ) -> None:
+ ) -> SummaryDifferenceKeys:
"""Print three strings on one line, with customized formatting and an optional marker in the fourth column.
Parameters
@@ -219,8 +230,18 @@ def side_by_side(
str_a
str_b
str_c
- dash_line : bool, default False
- highlight_diff : bool, default False
+ dash_line
+ highlight_diff
+ force_display_even_if_same
+ force_color
+
+ Returns
+ -------
+ str
+ "shared" if str_b == str_c,
+ "left" if only str_c is empty,
+ "right" if str_b is empty, and
+ "both" if they are different from each other.
"""
are_different = str_b != str_c
if (
@@ -228,7 +249,7 @@ def side_by_side(
and (are_different is False)
and self.keep_only_diffs
):
- return None
+ return "shared" # there are two non-empty strings, and they are equal to each other.
# If the 'b' and 'c' strings are different (or force_color is set),
# then change the font of 'a' to the color red.
@@ -265,6 +286,15 @@ def side_by_side(
self._add_to_history(str_a, str_b, str_c, str_marker)
+ if not are_different:
+ return "shared"
+ elif str_b and (not str_c):
+ return "left" # there is only a non-empty string on the left side.
+ elif str_c and (not str_b):
+ return "right" # there is only a non-empty string on the right side.
+ else:
+ return "both" # there are non-empty strings on both sides, and they are not equal.
+
def side_by_side_list_diff(self, list_a: list, list_b: list, counter_prefix="") -> None:
"""Print the items from two lists vertically (i.e., side by side), with customized formatting.
@@ -289,7 +319,24 @@ def lists_diff(
list_b: list,
ignore_order: bool = True,
) -> tuple[int, int, int]:
- """Compare two lists and state whether there are differences."""
+ """Compare two lists and state whether there are differences.
+
+ Parameters
+ ----------
+ list_a
+ list_b
+ ignore_order
+
+ Returns
+ -------
+ tuple
+ int
+ number of entries only present in the first (left) list
+ int
+ number of entries only present in the second (right) list
+ int
+ number of entries shared among the first (left) and second (right) list
+ """
set_a, set_b = set(list_a), set(list_b)
s_union = set_a.union(set_b)
@@ -311,11 +358,11 @@ def lists_diff(
self.print(msg + " (No items exist.)", add_to_history=True)
return 0, 0, len(list_a)
- # If contents are not the same, continue...
- left, right, both = count_diffs(list_a, list_b)
+ # If contents are different, continue...
+ left, right, shared = count_diffs(list_a, list_b)
self.print(
"\t" + "Are all items the same? ---> " + Fore.RED + f"{str(contents_are_same)}."
- f" ({_item_is_or_are(both)} shared, out of {len(s_union)} total.)",
+ f" ({_item_is_or_are(shared)} shared, out of {len(s_union)} total.)",
add_to_history=True,
)
@@ -328,9 +375,9 @@ def lists_diff(
self.side_by_side_list_diff(list_a, list_b)
self.side_by_side("Number of non-shared items:", str(left), str(right))
- return left, right, both
+ return left, right, shared
- def write_history_to_csv(self, filename: Union[str, Path] = "test.csv"):
+ def write_history_to_csv(self, filename: Union[str, Path] = "test.csv") -> None:
"""Save the line history that's been stored to a CSV file."""
headers = ["Info", "File A", "File B", "Other marks"]
with open(filename, "w", encoding="utf-8") as target:
@@ -338,8 +385,8 @@ def write_history_to_csv(self, filename: Union[str, Path] = "test.csv"):
writer.writerow(headers)
writer.writerows(self._line_history)
- def write_history_to_excel(self, filename: Union[str, Path] = "test.xlsx"):
- """Save the line history that's been stored to a CSV file."""
+ def write_history_to_excel(self, filename: Union[str, Path] = "test.xlsx") -> None:
+ """Save the line history that's been stored to an Excel file."""
workbook = openpyxl.Workbook()
sheet = workbook.active
@@ -363,14 +410,14 @@ def write_history_to_excel(self, filename: Union[str, Path] = "test.xlsx"):
workbook.save(filename)
-def _item_is_or_are(count):
+def _item_is_or_are(count) -> str:
if count == 1:
return f"{count} item is"
return f"{count} items are"
-def _excel_red_cells(data, sheet):
+def _excel_red_cells(data, sheet) -> Iterator:
"""Stylize cells in Excel with a red font."""
for cell in data:
cell = Cell(sheet, column="A", row=1, value=cell)
@@ -378,7 +425,7 @@ def _excel_red_cells(data, sheet):
yield cell
-def _excel_bold_underline_cells(data, sheet):
+def _excel_bold_underline_cells(data, sheet) -> Iterator:
"""Stylize cells in Excel with a bold and underlined font."""
for cell in data:
cell = Cell(sheet, column="A", row=1, value=cell)
diff --git a/ncompare/sequence_operations.py b/ncompare/sequence_operations.py
index 5baef68..5d2e246 100644
--- a/ncompare/sequence_operations.py
+++ b/ncompare/sequence_operations.py
@@ -73,7 +73,7 @@ def common_elements(
def count_diffs(
list_a: Union[list[str], list[int], str], list_b: Union[list[str], list[int], str]
) -> tuple[int, int, int]:
- """Count how many elements are either uniquely in one list or the other, or in both.
+ """Count how many elements are either uniquely in one list or the other, or shared.
Note
----
@@ -96,6 +96,6 @@ def count_diffs(
# The number of differences is computed.
left = len(set_a - set_b)
right = len(set_b - set_a)
- both = len(set_a.intersection(set_b))
+ shared = len(set_a.intersection(set_b))
- return left, right, both
+ return left, right, shared
diff --git a/poetry.lock b/poetry.lock
index 55630c1..cfb84e1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1167,13 +1167,13 @@ pygments = ">2.12.0"
[[package]]
name = "mkdocs-material"
-version = "9.5.44"
+version = "9.5.47"
description = "Documentation that simply works"
optional = false
python-versions = ">=3.8"
files = [
- {file = "mkdocs_material-9.5.44-py3-none-any.whl", hash = "sha256:47015f9c167d58a5ff5e682da37441fc4d66a1c79334bfc08d774763cacf69ca"},
- {file = "mkdocs_material-9.5.44.tar.gz", hash = "sha256:f3a6c968e524166b3f3ed1fb97d3ed3e0091183b0545cedf7156a2a6804c56c0"},
+ {file = "mkdocs_material-9.5.47-py3-none-any.whl", hash = "sha256:53fb9c9624e7865da6ec807d116cd7be24b3cb36ab31b1d1d1a9af58c56009a2"},
+ {file = "mkdocs_material-9.5.47.tar.gz", hash = "sha256:fc3b7a8e00ad896660bd3a5cc12ca0cb28bdc2bcbe2a946b5714c23ac91b0ede"},
]
[package.dependencies]
@@ -1812,13 +1812,13 @@ extra = ["pygments (>=2.12)"]
[[package]]
name = "pytest"
-version = "8.3.3"
+version = "8.3.4"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"},
- {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"},
+ {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"},
+ {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"},
]
[package.dependencies]
@@ -2339,29 +2339,29 @@ files = [
[[package]]
name = "ruff"
-version = "0.7.3"
+version = "0.8.1"
description = "An extremely fast Python linter and code formatter, written in Rust."
optional = false
python-versions = ">=3.7"
files = [
- {file = "ruff-0.7.3-py3-none-linux_armv6l.whl", hash = "sha256:34f2339dc22687ec7e7002792d1f50712bf84a13d5152e75712ac08be565d344"},
- {file = "ruff-0.7.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:fb397332a1879b9764a3455a0bb1087bda876c2db8aca3a3cbb67b3dbce8cda0"},
- {file = "ruff-0.7.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:37d0b619546103274e7f62643d14e1adcbccb242efda4e4bdb9544d7764782e9"},
- {file = "ruff-0.7.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d59f0c3ee4d1a6787614e7135b72e21024875266101142a09a61439cb6e38a5"},
- {file = "ruff-0.7.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44eb93c2499a169d49fafd07bc62ac89b1bc800b197e50ff4633aed212569299"},
- {file = "ruff-0.7.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d0242ce53f3a576c35ee32d907475a8d569944c0407f91d207c8af5be5dae4e"},
- {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6b6224af8b5e09772c2ecb8dc9f3f344c1aa48201c7f07e7315367f6dd90ac29"},
- {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c50f95a82b94421c964fae4c27c0242890a20fe67d203d127e84fbb8013855f5"},
- {file = "ruff-0.7.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7f3eff9961b5d2644bcf1616c606e93baa2d6b349e8aa8b035f654df252c8c67"},
- {file = "ruff-0.7.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8963cab06d130c4df2fd52c84e9f10d297826d2e8169ae0c798b6221be1d1d2"},
- {file = "ruff-0.7.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:61b46049d6edc0e4317fb14b33bd693245281a3007288b68a3f5b74a22a0746d"},
- {file = "ruff-0.7.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:10ebce7696afe4644e8c1a23b3cf8c0f2193a310c18387c06e583ae9ef284de2"},
- {file = "ruff-0.7.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3f36d56326b3aef8eeee150b700e519880d1aab92f471eefdef656fd57492aa2"},
- {file = "ruff-0.7.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5d024301109a0007b78d57ab0ba190087b43dce852e552734ebf0b0b85e4fb16"},
- {file = "ruff-0.7.3-py3-none-win32.whl", hash = "sha256:4ba81a5f0c5478aa61674c5a2194de8b02652f17addf8dfc40c8937e6e7d79fc"},
- {file = "ruff-0.7.3-py3-none-win_amd64.whl", hash = "sha256:588a9ff2fecf01025ed065fe28809cd5a53b43505f48b69a1ac7707b1b7e4088"},
- {file = "ruff-0.7.3-py3-none-win_arm64.whl", hash = "sha256:1713e2c5545863cdbfe2cbce21f69ffaf37b813bfd1fb3b90dc9a6f1963f5a8c"},
- {file = "ruff-0.7.3.tar.gz", hash = "sha256:e1d1ba2e40b6e71a61b063354d04be669ab0d39c352461f3d789cac68b54a313"},
+ {file = "ruff-0.8.1-py3-none-linux_armv6l.whl", hash = "sha256:fae0805bd514066f20309f6742f6ee7904a773eb9e6c17c45d6b1600ca65c9b5"},
+ {file = "ruff-0.8.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b8a4f7385c2285c30f34b200ca5511fcc865f17578383db154e098150ce0a087"},
+ {file = "ruff-0.8.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cd054486da0c53e41e0086e1730eb77d1f698154f910e0cd9e0d64274979a209"},
+ {file = "ruff-0.8.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2029b8c22da147c50ae577e621a5bfbc5d1fed75d86af53643d7a7aee1d23871"},
+ {file = "ruff-0.8.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2666520828dee7dfc7e47ee4ea0d928f40de72056d929a7c5292d95071d881d1"},
+ {file = "ruff-0.8.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:333c57013ef8c97a53892aa56042831c372e0bb1785ab7026187b7abd0135ad5"},
+ {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:288326162804f34088ac007139488dcb43de590a5ccfec3166396530b58fb89d"},
+ {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b12c39b9448632284561cbf4191aa1b005882acbc81900ffa9f9f471c8ff7e26"},
+ {file = "ruff-0.8.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:364e6674450cbac8e998f7b30639040c99d81dfb5bbc6dfad69bc7a8f916b3d1"},
+ {file = "ruff-0.8.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b22346f845fec132aa39cd29acb94451d030c10874408dbf776af3aaeb53284c"},
+ {file = "ruff-0.8.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b2f2f7a7e7648a2bfe6ead4e0a16745db956da0e3a231ad443d2a66a105c04fa"},
+ {file = "ruff-0.8.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:adf314fc458374c25c5c4a4a9270c3e8a6a807b1bec018cfa2813d6546215540"},
+ {file = "ruff-0.8.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a885d68342a231b5ba4d30b8c6e1b1ee3a65cf37e3d29b3c74069cdf1ee1e3c9"},
+ {file = "ruff-0.8.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d2c16e3508c8cc73e96aa5127d0df8913d2290098f776416a4b157657bee44c5"},
+ {file = "ruff-0.8.1-py3-none-win32.whl", hash = "sha256:93335cd7c0eaedb44882d75a7acb7df4b77cd7cd0d2255c93b28791716e81790"},
+ {file = "ruff-0.8.1-py3-none-win_amd64.whl", hash = "sha256:2954cdbe8dfd8ab359d4a30cd971b589d335a44d444b6ca2cb3d1da21b75e4b6"},
+ {file = "ruff-0.8.1-py3-none-win_arm64.whl", hash = "sha256:55873cc1a473e5ac129d15eccb3c008c096b94809d693fc7053f588b67822737"},
+ {file = "ruff-0.8.1.tar.gz", hash = "sha256:3583db9a6450364ed5ca3f3b4225958b24f78178908d5c4bc0f46251ccca898f"},
]
[[package]]
@@ -2436,22 +2436,22 @@ files = [
[[package]]
name = "tornado"
-version = "6.4.1"
+version = "6.4.2"
description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
optional = false
python-versions = ">=3.8"
files = [
- {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"},
- {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14"},
- {file = "tornado-6.4.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e20b9113cd7293f164dc46fffb13535266e713cdb87bd2d15ddb336e96cfc4"},
- {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ae50a504a740365267b2a8d1a90c9fbc86b780a39170feca9bcc1787ff80842"},
- {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613bf4ddf5c7a95509218b149b555621497a6cc0d46ac341b30bd9ec19eac7f3"},
- {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:25486eb223babe3eed4b8aecbac33b37e3dd6d776bc730ca14e1bf93888b979f"},
- {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:454db8a7ecfcf2ff6042dde58404164d969b6f5d58b926da15e6b23817950fc4"},
- {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a02a08cc7a9314b006f653ce40483b9b3c12cda222d6a46d4ac63bb6c9057698"},
- {file = "tornado-6.4.1-cp38-abi3-win32.whl", hash = "sha256:d9a566c40b89757c9aa8e6f032bcdb8ca8795d7c1a9762910c722b1635c9de4d"},
- {file = "tornado-6.4.1-cp38-abi3-win_amd64.whl", hash = "sha256:b24b8982ed444378d7f21d563f4180a2de31ced9d8d84443907a0a64da2072e7"},
- {file = "tornado-6.4.1.tar.gz", hash = "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9"},
+ {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e828cce1123e9e44ae2a50a9de3055497ab1d0aeb440c5ac23064d9e44880da1"},
+ {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803"},
+ {file = "tornado-6.4.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a017d239bd1bb0919f72af256a970624241f070496635784d9bf0db640d3fec"},
+ {file = "tornado-6.4.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c36e62ce8f63409301537222faffcef7dfc5284f27eec227389f2ad11b09d946"},
+ {file = "tornado-6.4.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca9eb02196e789c9cb5c3c7c0f04fb447dc2adffd95265b2c7223a8a615ccbf"},
+ {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:304463bd0772442ff4d0f5149c6f1c2135a1fae045adf070821c6cdc76980634"},
+ {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:c82c46813ba483a385ab2a99caeaedf92585a1f90defb5693351fa7e4ea0bf73"},
+ {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:932d195ca9015956fa502c6b56af9eb06106140d844a335590c1ec7f5277d10c"},
+ {file = "tornado-6.4.2-cp38-abi3-win32.whl", hash = "sha256:2876cef82e6c5978fde1e0d5b1f919d756968d5b4282418f3146b79b58556482"},
+ {file = "tornado-6.4.2-cp38-abi3-win_amd64.whl", hash = "sha256:908b71bf3ff37d81073356a5fadcc660eb10c1476ee6e2725588626ce7e5ca38"},
+ {file = "tornado-6.4.2.tar.gz", hash = "sha256:92bad5b4746e9879fd7bf1eb21dce4e3fc5128d71601f80005afa39237ad620b"},
]
[[package]]
@@ -2618,4 +2618,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
-content-hash = "23343a1e303b445f5cc352380af173128fbd50b66b83105dc4b4eb1dba38aab8"
+content-hash = "96eeae493d6ddb99129f57dd7f124315c3312da2d07bdd1dfa37f912b804145c"
diff --git a/pyproject.toml b/pyproject.toml
index ccc0c32..54b05ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ncompare"
-version = "1.11.0"
+version = "1.12.0rc5"
description = "Compare the structure of two NetCDF files at the command line"
authors = ["Daniel Kaufman "]
readme = "README.md"
@@ -32,7 +32,7 @@ openpyxl = ">=3.1.2"
[tool.poetry.group.dev.dependencies]
pytest = ">=7.4.2,<9.0.0"
-ruff = ">=0.5.1,<0.7.4"
+ruff = ">=0.5.1,<0.8.2"
mypy = ">=1.5.1"
pytest-cov = ">=4.1,<7.0"
mkdocs = ">=1.5.3"
diff --git a/tests/data/a-b_test_golden_file.csv b/tests/data/a-b_test_golden_file.csv
index 749bec1..b3bb60a 100644
--- a/tests/data/a-b_test_golden_file.csv
+++ b/tests/data/a-b_test_golden_file.csv
@@ -1,103 +1,121 @@
-Info,File A,File B,Other marks
-Root-level Dimensions:
- Are all items the same? ---> True.
-Root-level Groups:
- Are all items the same? ---> True.
-All variables:
- ,File A,File B,
-All Variables, , ,
--,-,-,
- , , ,
-GROUP #00,/,/,
-num variables in group:,2,2,
--,-,-,
------VARIABLE-----:,conditions,conditions,
-dtype:,int32,int32,
-shape:,"(2,)","(2,)",
-chunksize:,contiguous,contiguous,
------VARIABLE-----:,time,time,
-dtype:,float64,float64,
-shape:,"(5,)","(5,)",
-chunksize:,[512],[512],
-calendar:,gregorian,gregorian,
-coordinates:,time,time,
-long_name:,Time of observation,Time of observation,
-units:,hours since 0001-01-01 00:00:00.0,hours since 0001-01-01 00:00:00.0,
- , , ,
-GROUP #01,/Data,/Data,
-num variables in group:,1,1,
--,-,-,
------VARIABLE-----:,level,level,
-dtype:,int32,int32,
-shape:,"(2,)","(2,)",
-chunksize:,[1024],[1024],
-units:,hPa,hPa,
- , , ,
-GROUP #02,/Position,/Position,
-num variables in group:,2,2,
--,-,-,
------VARIABLE-----:,lat,lat,
-dtype:,float32,float32,
-shape:,"(3,)","(2,)",***
-chunksize:,contiguous,contiguous,
-units:,degrees north,degrees north,
------VARIABLE-----:,lon,lon,
-dtype:,float32,float32,
-shape:,"(4,)","(2,)",***
-chunksize:,contiguous,contiguous,
-units:,degrees east,degrees east,
- , , ,
-GROUP #03,/Statistics,/Statistics,
-num variables in group:,1,1,
--,-,-,
------VARIABLE-----:,mean_value,,
-dtype:,float32,,***
-shape:,"(5,)",,***
-chunksize:,[1024],,***
-coordinates:,time,,***
-long_name:,average value for each time,,***
------VARIABLE-----:,,std_value,
-dtype:,,float32,***
-shape:,,"(5,)",***
-chunksize:,,[1024],***
-coordinates:,,time,***
-long_name:,,standard deviation value for each time,***
- , , ,
-GROUP #04,/Data/Products,/Data/Products,
-num variables in group:,1,1,
--,-,-,
------VARIABLE-----:,temp,temp,
-dtype:,float32,float32,
-shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",***
-chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",***
-long_name:,temperature,temperature,
-units:,K,Kelvin,***
- , , ,
-GROUP #05,/Data/Quality,/Data/Quality,
-num variables in group:,1,1,
--,-,-,
------VARIABLE-----:,quality_flag,quality_flag,
-dtype:,int32,int32,
-shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",***
-chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",***
-units:,unitless,unitless,
- , , ,
-GROUP #06,,/Data/Supplemental,
-num variables in group:,0,1,***
--,-,-,
------VARIABLE-----:,,supplemental_flag,
-dtype:,,int32,***
-shape:,,"(5, 2)",***
-chunksize:,,"[1, 2]",***
-units:,,unitless,***
- , , ,
-GROUP #07,,/Data/Supplemental/Details,
-num variables in group:,0,1,***
--,-,-,
------VARIABLE-----:,,condition_details,
-dtype:,,float64,***
-shape:,,"(2,)",***
-chunksize:,,contiguous,***
--,-,-,
-Total number of shared items:,7,7,
-Total number of non-shared items:,1,3,
+Info,File A,File B,Other marks
+Root-level Dimensions:
+ Are all items the same? ---> True.
+Root-level Groups:
+ Are all items the same? ---> True.
+All variables:
+ ,File A,File B,
+All Variables, , ,
+-,-,-,
+ , , ,
+GROUP #00,/,/,
+num variables in group:,2,2,
+-,-,-,
+-----VARIABLE-----:,conditions,conditions,
+dtype:,int32,int32,
+dimensions:,"('conditions',)","('conditions',)",
+shape:,"(2,)","(2,)",
+chunksize:,contiguous,contiguous,
+-----VARIABLE-----:,time,time,
+dtype:,float64,float64,
+dimensions:,"('time',)","('time',)",
+shape:,"(5,)","(5,)",
+chunksize:,[512],[512],
+calendar:,gregorian,gregorian,
+coordinates:,time,time,
+long_name:,Time of observation,Time of observation,
+units:,hours since 0001-01-01 00:00:00.0,hours since 0001-01-01 00:00:00.0,
+ , , ,
+GROUP #01,/Data,/Data,
+num variables in group:,1,1,
+-,-,-,
+-----VARIABLE-----:,level,level,
+dtype:,int32,int32,
+dimensions:,"('level',)","('level',)",
+shape:,"(2,)","(2,)",
+chunksize:,[1024],[1024],
+units:,hPa,hPa,
+ , , ,
+GROUP #02,/Position,/Position,
+num variables in group:,2,2,
+-,-,-,
+-----VARIABLE-----:,lat,lat,
+dtype:,float32,float32,
+dimensions:,"('lat',)","('lat',)",
+shape:,"(3,)","(2,)",***
+chunksize:,contiguous,contiguous,
+units:,degrees north,degrees north,
+-----VARIABLE-----:,lon,lon,
+dtype:,float32,float32,
+dimensions:,"('lon',)","('lon',)",
+shape:,"(4,)","(2,)",***
+chunksize:,contiguous,contiguous,
+units:,degrees east,degrees east,
+ , , ,
+GROUP #03,/Statistics,/Statistics,
+num variables in group:,1,1,
+-,-,-,
+-----VARIABLE-----:,mean_value,,
+dtype:,float32,,***
+dimensions:,"('time',)",,***
+shape:,"(5,)",,***
+chunksize:,[1024],,***
+coordinates:,time,,***
+long_name:,average value for each time,,***
+-----VARIABLE-----:,,std_value,
+dtype:,,float32,***
+dimensions:,,"('time',)",***
+shape:,,"(5,)",***
+chunksize:,,[1024],***
+coordinates:,,time,***
+long_name:,,standard deviation value for each time,***
+ , , ,
+GROUP #04,/Data/Products,/Data/Products,
+num variables in group:,1,1,
+-,-,-,
+-----VARIABLE-----:,temp,temp,
+dtype:,float32,float32,
+dimensions:,"('time', 'level', 'lat', 'lon')","('time', 'level', 'lat', 'lon')",
+shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",***
+chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",***
+long_name:,temperature,temperature,
+units:,K,Kelvin,***
+ , , ,
+GROUP #05,/Data/Quality,/Data/Quality,
+num variables in group:,1,1,
+-,-,-,
+-----VARIABLE-----:,quality_flag,quality_flag,
+dtype:,int32,int32,
+dimensions:,"('time', 'level', 'lat', 'lon')","('time', 'level', 'lat', 'lon')",
+shape:,"(5, 2, 3, 4)","(5, 2, 2, 2)",***
+chunksize:,"[1, 1, 3, 4]","[1, 1, 2, 2]",***
+units:,unitless,unitless,
+ , , ,
+GROUP #06,,/Data/Supplemental,
+num variables in group:,0,1,***
+-,-,-,
+-----VARIABLE-----:,,supplemental_flag,
+dtype:,,int32,***
+dimensions:,,"('time', 'conditions')",***
+shape:,,"(5, 2)",***
+chunksize:,,"[1, 2]",***
+units:,,unitless,***
+ , , ,
+GROUP #07,,/Data/Supplemental/Details,
+num variables in group:,0,1,***
+-,-,-,
+-----VARIABLE-----:,,condition_details,
+dtype:,,float64,***
+dimensions:,,"('conditions',)",***
+shape:,,"(2,)",***
+chunksize:,,contiguous,***
+-,-,-,
+SUMMARY,-,-,
+Total # of shared variables:,7,7,
+Total # of non-shared variables:,1,3,
+Total # of shared groups:,5,5,
+Total # of non-shared groups:,0,2,
+Total # of shared attributes:,31,31,
+Total # of non-shared attributes:,13,22,
+Differences were found in these attributes:
+"['chunksize', 'coordinates', 'dimensions', 'dtype', 'long_name', 'shape', 'units']"
diff --git a/tests/data/a-b_test_golden_file.txt b/tests/data/a-b_test_golden_file.txt
index 54166ff..a661ee4 100644
--- a/tests/data/a-b_test_golden_file.txt
+++ b/tests/data/a-b_test_golden_file.txt
@@ -1,5 +1,5 @@
-File A: ncompare/tests/data/test_a.nc
-File B: ncompare/tests/data/test_b.nc
+File A: /Users/dkaufma3/projects/ncompare_project/2_ExperimentFolder/ncompare/tests/data/test_a.nc
+File B: /Users/dkaufma3/projects/ncompare_project/2_ExperimentFolder/ncompare/tests/data/test_b.nc
Root-level Dimensions:
Are all items the same? ---> True.
@@ -9,8 +9,6 @@ Root-level Groups:
Are all items the same? ---> True.
['Data', 'Position', 'Statistics']
-No variable group selected for comparison. Skipping..
-
All variables:
File A File B
All Variables
@@ -21,10 +19,12 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: conditions conditions
dtype: int32 int32
+ dimensions: ('conditions',) ('conditions',)
shape: (2,) (2,)
chunksize: contiguous contiguous
-----VARIABLE-----: time time
dtype: float64 float64
+ dimensions: ('time',) ('time',)
shape: (5,) (5,)
chunksize: [512] [512]
calendar: gregorian gregorian
@@ -37,6 +37,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: level level
dtype: int32 int32
+ dimensions: ('level',) ('level',)
shape: (2,) (2,)
chunksize: [1024] [1024]
units: hPa hPa
@@ -46,11 +47,13 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: lat lat
dtype: float32 float32
+ dimensions: ('lat',) ('lat',)
shape: (3,) (2,)
chunksize: contiguous contiguous
units: degrees north degrees north
-----VARIABLE-----: lon lon
dtype: float32 float32
+ dimensions: ('lon',) ('lon',)
shape: (4,) (2,)
chunksize: contiguous contiguous
units: degrees east degrees east
@@ -60,12 +63,14 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: mean_value
dtype: float32
+ dimensions: ('time',)
shape: (5,)
chunksize: [1024]
coordinates: time
long_name: average value for each time
-----VARIABLE-----: std_value
dtype: float32
+ dimensions: ('time',)
shape: (5,)
chunksize: [1024]
coordinates: time
@@ -76,6 +81,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: temp temp
dtype: float32 float32
+ dimensions: ('time', 'level', 'lat', 'lon') ('time', 'level', 'lat', 'lon')
shape: (5, 2, 3, 4) (5, 2, 2, 2)
chunksize: [1, 1, 3, 4] [1, 1, 2, 2]
long_name: temperature temperature
@@ -86,6 +92,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: quality_flag quality_flag
dtype: int32 int32
+ dimensions: ('time', 'level', 'lat', 'lon') ('time', 'level', 'lat', 'lon')
shape: (5, 2, 3, 4) (5, 2, 2, 2)
chunksize: [1, 1, 3, 4] [1, 1, 2, 2]
units: unitless unitless
@@ -95,6 +102,7 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: supplemental_flag
dtype: int32
+ dimensions: ('time', 'conditions')
shape: (5, 2)
chunksize: [1, 2]
units: unitless
@@ -104,10 +112,20 @@ All variables:
- ------------------------------------------------ ------------------------------------------------
-----VARIABLE-----: condition_details
dtype: float64
+ dimensions: ('conditions',)
shape: (2,)
chunksize: contiguous
- ------------------------------------------------ ------------------------------------------------
- Total number of shared items: 7 7
- Total number of non-shared items: 1 3
+ SUMMARY ------------------------------------------------ ------------------------------------------------
+ Total # of shared variables: 7 7
+ Total # of non-shared variables: 1 3
+ Total # of shared groups: 5 5
+ Total # of non-shared groups: 0 2
+ Total # of shared attributes: 31 31
+ Total # of non-shared attributes: 13 22
+
+Differences were found in these attributes:
+
+['chunksize', 'coordinates', 'dimensions', 'dtype', 'long_name', 'shape', 'units']
Done.
diff --git a/tests/data/a-b_test_golden_file.xlsx b/tests/data/a-b_test_golden_file.xlsx
index 5b5b49c..a5426d3 100644
Binary files a/tests/data/a-b_test_golden_file.xlsx and b/tests/data/a-b_test_golden_file.xlsx differ
diff --git a/tests/test_core.py b/tests/test_core.py
index 8d127a6..b8595d2 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -31,10 +31,12 @@
from contextlib import nullcontext as does_not_raise
-import pytest
-import xarray as xr
+import netCDF4 as nc
-from ncompare.core import _get_vars, _match_random_value, _print_sample_values, compare
+from ncompare.core import (
+ _var_properties,
+ compare,
+)
def compare_ab(a, b):
@@ -69,139 +71,15 @@ def test_no_error_compare_2groupsTo1Subgroup(
compare_ba(ds_3dims_3vars_4coords_2groups, ds_3dims_3vars_4coords_1subgroup)
-def test_matching_random_values(
- ds_3dims_2vars_4coords,
- ds_4dims_3vars_5coords,
- ds_3dims_3vars_4coords_1group,
- ds_1dim_1var_1coord,
- ds_1dim_1var_allnan_1coord,
- outputter_to_console,
-):
- variable_array_1 = xr.open_dataset(ds_3dims_2vars_4coords).variables["z1"]
- variable_array_2 = xr.open_dataset(ds_4dims_3vars_5coords).variables["z1"]
- variable_array_3 = xr.open_dataset(ds_1dim_1var_1coord).variables["z1"]
- variable_array_allnan = xr.open_dataset(ds_1dim_1var_allnan_1coord).variables["z1"]
-
- assert (
- _match_random_value(
- outputter_to_console,
- variable_array_1,
- variable_array_1,
- )
- is True
- )
- assert (
- _match_random_value(
- outputter_to_console,
- variable_array_1,
- variable_array_2,
- )
- is False
- )
- assert (
- _match_random_value(
- outputter_to_console,
- variable_array_3,
- variable_array_3,
- )
- is True
- )
- # NaN to non-NaN is NOT considered a match
- assert (
- _match_random_value(
- outputter_to_console,
- variable_array_3,
- variable_array_allnan,
- )
- is None
- )
- # NaN to NaN is considered a match
- assert (
- _match_random_value(
- outputter_to_console,
- variable_array_allnan,
- variable_array_allnan,
- )
- is True
- )
-
-
-def test_print_values_runs_with_no_error(ds_3dims_3vars_4coords_1group, outputter_to_console):
- with does_not_raise():
- _print_sample_values(
- outputter_to_console,
- ds_3dims_3vars_4coords_1group,
- groupname="Group1",
- varname="step",
- )
-
-
-def test_print_values_to_text_file_runs_with_no_error(
- ds_3dims_3vars_4coords_1group, outputter_to_text_file, temp_test_text_file_path
-):
- _print_sample_values(
- outputter_to_text_file,
- ds_3dims_3vars_4coords_1group,
- groupname="Group1",
- varname="step",
- )
- outputter_to_text_file._text_file_obj.close()
-
- comparison_variable = xr.open_dataset(
- ds_3dims_3vars_4coords_1group, backend_kwargs={"group": "Group1"}
- )["step"]
-
- with open(temp_test_text_file_path) as f:
- lines = f.readlines()
- assert lines[0].strip().replace("[", "").replace("]", "").split() == [
- str(round(x, 1)) for x in comparison_variable[:].values
- ]
-
-
-def test_comparison_group_no_error_for_duplicate_dataset(
- ds_3dims_3vars_4coords_1group, temp_test_text_file_path
-):
- compare(
- ds_3dims_3vars_4coords_1group,
- ds_3dims_3vars_4coords_1group,
- comparison_var_group="Group1",
- file_text=temp_test_text_file_path,
- )
-
- found_expected = False
- with open(temp_test_text_file_path) as f:
- for line in f.readlines():
- if "Variables within specified group :" in line:
- found_expected = True
-
- assert found_expected
-
-
-def test_comparison_var_no_error_for_duplicate_dataset(
- ds_3dims_3vars_4coords_1group, temp_test_text_file_path
-):
- compare(
- ds_3dims_3vars_4coords_1group,
- ds_3dims_3vars_4coords_1group,
- comparison_var_group="Group1",
- comparison_var_name="var1",
- file_text=temp_test_text_file_path,
- )
-
- found_expected = False
- with open(temp_test_text_file_path) as f:
- for line in f.readlines():
- if "Sample values within specified variable :" in line:
- found_expected = True
-
- assert found_expected
-
-
-def test_get_vars_with_group(ds_3dims_3vars_4coords_1group):
- result = _get_vars(ds_3dims_3vars_4coords_1group, groupname="Group1")
- assert set(result) == {"step", "var1", "var2", "w"}
+def test_zero_for_comparison_with_no_differences(ds_3dims_3vars_4coords_1subgroup):
+ assert compare(ds_3dims_3vars_4coords_1subgroup, ds_3dims_3vars_4coords_1subgroup) == 0
-def test_get_vars_error_when_no_group(ds_3dims_2vars_4coords):
- with pytest.raises(OSError):
- _get_vars(ds_3dims_2vars_4coords, groupname="nonexistent_group")
+def test_var_properties(ds_3dims_3vars_4coords_1group):
+ with nc.Dataset(ds_3dims_3vars_4coords_1group) as ds:
+ result = _var_properties(ds.groups["Group1"], varname="step")
+ assert result.varname == "step"
+ assert result.dtype == "float32"
+ assert result.shape == "(3,)"
+ assert result.chunking == "contiguous"
+ assert result.attributes == {}
diff --git a/tests/test_printing.py b/tests/test_printing.py
index e3223a3..d36bff2 100644
--- a/tests/test_printing.py
+++ b/tests/test_printing.py
@@ -25,8 +25,8 @@
def test_list_of_strings_diff(outputter_to_console):
- left, right, both = outputter_to_console.lists_diff(
+ left, right, shared = outputter_to_console.lists_diff(
["hey", "yo", "beebop"], ["what", "is", "this", "beebop"]
)
- assert (left, right, both) == (2, 3, 1)
+ assert (left, right, shared) == (2, 3, 1)
diff --git a/tests/test_sequence_operations.py b/tests/test_sequence_operations.py
index d1dd4a1..216ef26 100644
--- a/tests/test_sequence_operations.py
+++ b/tests/test_sequence_operations.py
@@ -52,12 +52,12 @@ def test_common_elements(two_example_lists):
def test_count_str_list_diffs(two_example_lists):
- left, right, both = count_diffs(*two_example_lists)
+ left, right, shared = count_diffs(*two_example_lists)
- assert (left, right, both) == (2, 4, 1)
+ assert (left, right, shared) == (2, 4, 1)
def test_count_int_list_diffs():
- left, right, both = count_diffs([1, 9, 5, 44, 89, 13], [3, 0, 5, 1])
+ left, right, shared = count_diffs([1, 9, 5, 44, 89, 13], [3, 0, 5, 1])
- assert (left, right, both) == (4, 2, 2)
+ assert (left, right, shared) == (4, 2, 2)