Skip to content

Commit

Permalink
Merge pull request #278 from nasa/feature/issue-270-provide-numerical…
Browse files Browse the repository at this point in the history
…-output-where-zero-means-no-differences-found

Feature/issue 270 provide numerical output where zero means no differences found
  • Loading branch information
danielfromearth authored Dec 13, 2024
2 parents 6ebaafa + 302c7be commit e4ecd62
Show file tree
Hide file tree
Showing 7 changed files with 336 additions and 48 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Categorize counts of differences (including attributes) in a summary ([#276](https://github.com/nasa/ncompare/pull/276)) ([**@danielfromearth**](https://github.com/danielfromearth))
- Include dimensions in variable attribute comparisons. ([#277](https://github.com/nasa/ncompare/pull/277)) ([**@danielfromearth**](https://github.com/danielfromearth))
- Provide numerical output where zero means no differences found ([#278](https://github.com/nasa/ncompare/pull/278)) ([**@danielfromearth**](https://github.com/danielfromearth))

### Changed

Expand Down
44 changes: 19 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,21 @@ Compare the structure of two NetCDF files at the command line.

## Installing

The latest release of `ncompare` can be installed with `mamba`, `conda` or `pip`.

#### Using `mamba`
The latest release of `ncompare` can be installed with `mamba`, `conda` or `pip`:

```bash
mamba install -c conda-forge ncompare
```

#### Using `conda`

```bash
conda install -c conda-forge ncompare
```

#### Using `pip`

```bash
pip install ncompare
```

## Usage
## Usage Examples

### At a command line:
To compare two netCDF files,
pass the filepaths for each of the two netCDF files directly to ncompare, as follows:

Expand All @@ -77,21 +70,22 @@ a common use of _ncompare_ may look like this example:
ncompare S001G01.nc S001G01_SUBSET.nc --file-text subset_comparison.txt
```

**A more complete usage demonstration with example output is shown in
[this example notebook](https://ncompare.readthedocs.io/en/latest/example/ncompare-example-usage/).**

### Options

- `-h`, `--help` : Show this help message and exit.
- `--file-text` [FILE_PATH]: Text file to write output to.
- `--file-csv` [FILE_PATH]: Comma-separated values (CSV) file to write output to.
- `--file-xlsx` [FILE_PATH]: Excel file to write output to.
- `--only-diffs` : Only display variables and attributes that are different
- `--no-color` : Turn off all colorized output.
- `--show-attributes` : Include variable attributes in the table that compares variables.
- `--show-chunks` : Include chunk sizes in the table that compares variables.
- `--column-widths` [WIDTH, WIDTH, WIDTH]: Width, in number of characters, of the three columns in the comparison report
- `--version` : Show the current version and then exit.
### In a Python kernel:

```python
from ncompare import compare

total_number_of_differences = compare(
"<netcdf file 1>",
"<netcdf file 2>",
only_diffs=True,
show_attributes=True,
show_chunks=True,
)
```


### More complete usage demonstrations, with example output, are shown in [this example notebook](https://ncompare.readthedocs.io/en/latest/example/ncompare-example-usage/).

## Contributing

Expand Down
280 changes: 273 additions & 7 deletions docs/example/ncompare-example-usage.ipynb

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions ncompare/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,15 @@
# See the License for the specific language governing permissions and limitations under the License.

"""Main code for comparing NetCDF files."""

from importlib.metadata import version

from .core import (
compare,
)

__all__ = [
"compare",
]

__version__ = version("ncompare")
3 changes: 2 additions & 1 deletion ncompare/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,11 @@ def main() -> None: # pragma: no cover
delattr(args, "version")

try:
compare(**vars(args))
total_diff_count = compare(**vars(args))
except Exception: # pylint: disable=broad-exception-caught
print(traceback.format_exc())
sys.exit(1)
print(total_diff_count)
sys.exit(0) # a clean, no-issue, exit


Expand Down
40 changes: 25 additions & 15 deletions ncompare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def compare(
file_csv: Union[str, Path] = "",
file_xlsx: Union[str, Path] = "",
column_widths: Optional[tuple[Union[int, str], Union[int, str], Union[int, str]]] = None,
) -> None:
) -> int:
"""Compare the variables contained within two different netCDF datasets.
Parameters
Expand Down Expand Up @@ -101,7 +101,8 @@ def compare(
Returns
-------
None
int
total number of differences found (across variables, groups, and attributes)
"""
# Check the validity of paths.
nc_a = ensure_valid_path_exists(nc_a)
Expand All @@ -125,7 +126,7 @@ def compare(
out.print(f"File B: {nc_b}")

# Start the comparison process.
run_through_comparisons(
total_diff_count = run_through_comparisons(
out,
nc_a,
nc_b,
Expand All @@ -141,14 +142,16 @@ def compare(

out.print("\nDone.", colors=False)

return total_diff_count


def run_through_comparisons(
out: Outputter,
nc_a: Union[str, Path],
nc_b: Union[str, Path],
show_chunks: bool,
show_attributes: bool,
) -> None:
) -> int:
"""Execute a series of comparisons between two netCDF files.
Parameters
Expand All @@ -163,6 +166,11 @@ def run_through_comparisons(
whether to include data chunk sizes in the displayed comparison of variables
show_attributes
whether to include variable attributes in the displayed comparison of variables
Returns
-------
int
total number of differences found (across variables, groups, and attributes)
"""
# Show the dimensions of each file and evaluate differences.
out.print(Fore.LIGHTBLUE_EX + "\nRoot-level Dimensions:", add_to_history=True)
Expand All @@ -177,10 +185,12 @@ def run_through_comparisons(
_, _, _ = out.lists_diff(list_a, list_b)

out.print(Fore.LIGHTBLUE_EX + "\nAll variables:", add_to_history=True)
_, _, _ = compare_two_nc_files(
total_diff_count = compare_two_nc_files(
out, nc_a, nc_b, show_chunks=show_chunks, show_attributes=show_attributes
)

return total_diff_count


def walk_common_groups_tree(
top_a_name: str,
Expand Down Expand Up @@ -250,7 +260,7 @@ def compare_two_nc_files(
nc_two: Union[str, Path],
show_chunks: bool = False,
show_attributes: bool = False,
) -> tuple[int, int, int]:
) -> int:
"""Go through all groups and all variables, and show them side by side,
highlighting whether they align and where they don't.
Expand All @@ -269,13 +279,8 @@ def compare_two_nc_files(
Returns
-------
tuple
int
number of entries only present in the first (left) dataset
int
number of entries only present in the second (right) dataset
int
number of entries shared among the first (left) and second (right) datasets
int
total number of differences found (across variables, groups, and attributes)
"""
out.side_by_side(" ", "File A", "File B", force_display_even_if_same=True)
num_group_diffs: SummaryDifferencesDict = {
Expand Down Expand Up @@ -357,7 +362,12 @@ def compare_two_nc_files(
add_to_history=True,
)

return num_var_diffs["left"], num_var_diffs["right"], num_var_diffs["shared"]
# Return the total number of differences; thus, zero means no differences were found.
total_diff_count = sum(
[x["left"] + x["right"] for x in [num_var_diffs, num_group_diffs, num_attribute_diffs]]
)

return total_diff_count


def _print_summary_count_comparison_side_by_side(
Expand Down Expand Up @@ -385,7 +395,7 @@ def _print_summary_count_comparison_side_by_side(


def _print_group_details_side_by_side(
out,
out: Outputter,
group_a: Union[netCDF4.Dataset, netCDF4.Group],
group_a_name: str,
group_b: Union[netCDF4.Dataset, netCDF4.Group],
Expand Down
4 changes: 4 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def test_no_error_compare_2groupsTo1Subgroup(
compare_ba(ds_3dims_3vars_4coords_2groups, ds_3dims_3vars_4coords_1subgroup)


def test_zero_for_comparison_with_no_differences(ds_3dims_3vars_4coords_1subgroup):
assert compare(ds_3dims_3vars_4coords_1subgroup, ds_3dims_3vars_4coords_1subgroup) == 0


def test_var_properties(ds_3dims_3vars_4coords_1group):
with nc.Dataset(ds_3dims_3vars_4coords_1group) as ds:
result = _var_properties(ds.groups["Group1"], varname="step")
Expand Down

0 comments on commit e4ecd62

Please sign in to comment.