From ede3f1abc21b2472da4ed441820672a536346e8e Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:14:07 -0400 Subject: [PATCH 01/32] remove requests code --- environment.yml | 3 +-- finch/processes/utils.py | 20 +++++++------------- requirements.txt | 3 +-- requirements_docs.txt | 1 - 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/environment.yml b/environment.yml index 03a322f8..98b7dbe0 100644 --- a/environment.yml +++ b/environment.yml @@ -20,8 +20,7 @@ dependencies: - psutil - python-slugify - pywps >=4.5.1 - - pyyaml - - requests>=2.32.2 + - pyyaml >=6.0.1 - scipy >=1.9.0 - sentry-sdk - siphon diff --git a/finch/processes/utils.py b/finch/processes/utils.py index bf1659e8..644cecbb 100644 --- a/finch/processes/utils.py +++ b/finch/processes/utils.py @@ -2,6 +2,7 @@ import json import logging import os +import urllib.request import zipfile from collections.abc import Generator, Iterable from dataclasses import dataclass, field @@ -10,11 +11,11 @@ from multiprocessing.pool import ThreadPool from pathlib import Path from typing import Callable, Deque, Optional, Union +from urllib.error import URLError import cftime import numpy as np import pandas as pd -import requests import sentry_sdk import xarray as xr import xclim @@ -34,7 +35,6 @@ ) from pywps.configuration import get_config_value from pywps.inout.outputs import MetaFile, MetaLink4 -from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema from slugify import slugify from xclim.core.indicator import build_indicator_module_from_yaml from xclim.core.utils import InputKind @@ -508,11 +508,12 @@ def is_opendap_url(url): Even then, some OpenDAP servers seem to not include the specified header... So we need to let the netCDF4 library actually open the file. """ + req = urllib.request.Request(url, method="HEAD") + try: - content_description = requests.head(url, timeout=5).headers.get( - "Content-Description" - ) - except (ConnectionError, MissingSchema, InvalidSchema): + with urllib.request.urlopen(req, timeout=5) as response: + content_description = response.headers.get("Content-Description") + except URLError: return False if content_description: @@ -520,13 +521,6 @@ def is_opendap_url(url): else: return False - # try: - # # For a non-DAP URL, this just hangs python. - # dataset = netCDF4.Dataset(url) - # except OSError: - # return False - # return dataset.disk_format in ("DAP2", "DAP4") - def single_input_or_none(inputs, identifier) -> Optional[str]: """Return first input item.""" diff --git a/requirements.txt b/requirements.txt index 4ec83878..f1bc4c81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,8 +12,7 @@ parse psutil python-slugify pywps>=4.5.1 -pyyaml -requests>=2.32.2 # CVE-2024-35195 +pyyaml>=6.0.1 scipy>=1.9.0 sentry-sdk siphon diff --git a/requirements_docs.txt b/requirements_docs.txt index c6e6b318..04a8372c 100644 --- a/requirements_docs.txt +++ b/requirements_docs.txt @@ -17,4 +17,3 @@ xarray>=2023.01.0,<2023.11.0 xclim==0.43 numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability fonttools>=4.43.0 # not directly required, pinned by Snyk to avoid a vulnerability -requests>=2.32.0 # not directly required, pinned by Snyk to avoid a vulnerability From 3c211d7ae3a4b005e20f87649edb8d1a25541e34 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:52:23 -0400 Subject: [PATCH 02/32] WIP - first stab at modernizing finch --- environment-docs.yml | 4 +-- environment.yml | 12 +++---- finch/default.cfg | 3 +- finch/processes/__init__.py | 14 ++++++-- finch/processes/utils.py | 13 ++++--- finch/processes/wps_base.py | 54 ++++++++++++++-------------- finch/processes/wps_xclim_indices.py | 2 +- requirements.txt | 17 ++++----- requirements_dev.txt | 4 --- requirements_docs.txt | 18 ++++------ requirements_prod.txt | 5 ++- 11 files changed, 74 insertions(+), 72 deletions(-) diff --git a/environment-docs.yml b/environment-docs.yml index 6977cdf6..c399e471 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -2,8 +2,8 @@ name: finch channels: - conda-forge dependencies: - - python >=3.8,<3.12 - - anyascii >=0.3.2 + - python >=3.9,<3.13 + - anyascii >=0.3.0 - birdy >=0.8.1 - ipython >=8.0.0 - matplotlib-base >=3.5.0 diff --git a/environment.yml b/environment.yml index 98b7dbe0..050c33e5 100644 --- a/environment.yml +++ b/environment.yml @@ -2,11 +2,11 @@ name: finch channels: - conda-forge dependencies: - - python >=3.9,<3.12 + - python >=3.9,<3.13 - pip >=24.2.0 - anyascii >=0.3.0 - - cftime - - cf_xarray >=0.6.1,<0.9.3 + - cftime >=1.4.1 + - cf_xarray >=0.9.3 - click >=8.0.0 - clisops >=0.11.0 - dask >=2023.5.1 @@ -15,7 +15,7 @@ dependencies: - h5netcdf - netcdf4 <=1.6.5 - numpy >=1.23.0 - - pandas >=1.5.3,<2.2.0 + - pandas >=2.2.0 - parse - psutil - python-slugify @@ -24,6 +24,6 @@ dependencies: - scipy >=1.9.0 - sentry-sdk - siphon - - xarray >=2023.01.0,<2023.11.0 - - xclim =0.43 # remember to match xclim version in requirements_docs.txt as well + - xarray >=2023.11.0 + - xclim =0.52.2 # remember to match xclim version in requirements_docs.txt as well - xesmf >=0.8.2 diff --git a/finch/default.cfg b/finch/default.cfg index ecb2486b..a355a2a6 100644 --- a/finch/default.cfg +++ b/finch/default.cfg @@ -19,7 +19,8 @@ language=en-US,fr subset_threads = 1 datasets_config = datasets.yml default_dataset = candcs-u6 -xclim_modules = processes/modules/humidex,processes/modules/streamflow +xclim_modules = processes/modules/humidex +; processes/modules/streamflow ; Not yet reimplemented [finch:metadata] # All fields here are added as string attributes of computed indices. diff --git a/finch/processes/__init__.py b/finch/processes/__init__.py index 30c92415..90346cf8 100644 --- a/finch/processes/__init__.py +++ b/finch/processes/__init__.py @@ -1,6 +1,5 @@ # noqa: D104 import logging -from pathlib import Path from pywps.configuration import get_config_value from xclim.core.indicator import registry as xclim_registry @@ -52,11 +51,20 @@ def filter_func(elem): not_implemented = [ "DC", + "E_SAT", "FWI", + "HURS", + "HURS_FROMDEWPOINT", + "HUSS", + "HUSS_FROMDEWPOINT", "RH", "RH_FROMDEWPOINT", - "E_SAT", - "HUSS", + "SPI", + "SPEI", + "WIND_POWER_POTENTIAL", + "WIND_PROFILE", + "WIND_SPEED_FROM_VECTOR", + "WIND_VECTOR_FROM_SPEED", ] diff --git a/finch/processes/utils.py b/finch/processes/utils.py index 644cecbb..e6cab989 100644 --- a/finch/processes/utils.py +++ b/finch/processes/utils.py @@ -19,9 +19,10 @@ import sentry_sdk import xarray as xr import xclim +import xclim.core.options as xclim_options import yaml from netCDF4 import num2date -from pandas.api.types import is_numeric_dtype +from pandas.api.types import is_numeric_dtype # noqa from pywps import ( FORMATS, BoundingBoxInput, @@ -36,6 +37,7 @@ from pywps.configuration import get_config_value from pywps.inout.outputs import MetaFile, MetaLink4 from slugify import slugify +from xclim.core import formatting from xclim.core.indicator import build_indicator_module_from_yaml from xclim.core.utils import InputKind @@ -259,7 +261,7 @@ def compute_indices( ) options = {name: kwds.pop(name) for name in INDICATOR_OPTIONS if name in kwds} - with xclim.core.options.set_options(**options): + with xclim_options.set_options(**options): out = func(**kwds) output_dataset = xr.Dataset( @@ -272,7 +274,8 @@ def compute_indices( "YS": "yr", "MS": "mon", "QS-DEC": "seasonal", - "AS-JUL": "seasonal", + "YS-JAN": "seasonal", + "YS-JUL": "seasonal", } output_dataset.attrs["frequency"] = conversions.get(kwds["freq"], "day") @@ -535,7 +538,7 @@ def netcdf_file_list_to_csv( output_folder, filename_prefix, csv_precision: Optional[int] = None, -) -> tuple[list[str], str]: +) -> tuple[list[Path], str]: """Write csv files for a list of netcdf files. Produces one csv file per calendar type, along with a metadata folder in the output_folder. @@ -844,7 +847,7 @@ def update_history( """ from finch import __version__ # pylint: disable=cyclic-import - merged_history = xclim.core.formatting.merge_attributes( + merged_history = formatting.merge_attributes( "history", *inputs_list, new_line="\n", diff --git a/finch/processes/wps_base.py b/finch/processes/wps_base.py index 6f151d93..05b18cf1 100644 --- a/finch/processes/wps_base.py +++ b/finch/processes/wps_base.py @@ -2,7 +2,9 @@ import io import logging from inspect import _empty as empty_default # noqa +from typing import Any, Union +import pywps.exceptions import xclim from dask.diagnostics import ProgressBar from pywps import FORMATS, ComplexInput, LiteralInput, Process @@ -11,8 +13,6 @@ from sentry_sdk import configure_scope from xclim.core.utils import InputKind -from .utils import PywpsInput - LOGGER = logging.getLogger("PYWPS") @@ -140,7 +140,7 @@ def make_xclim_indicator_process( def convert_xclim_inputs_to_pywps( params: dict, parent=None, parse_percentiles: bool = False -) -> list[PywpsInput]: +) -> tuple[list[Union[LiteralInput, ComplexInput]], list[Any]]: r"""Convert xclim indicators properties to pywps inputs. If parse_percentiles is True, percentile variables (\*_per) are dropped and replaced by @@ -184,53 +184,55 @@ def convert_xclim_inputs_to_pywps( default=default_percentiles[parent][name], ) ) - elif attrs["kind"] in [InputKind.VARIABLE, InputKind.OPTIONAL_VARIABLE]: + elif attrs.kind in [InputKind.VARIABLE, InputKind.OPTIONAL_VARIABLE]: inputs.append(make_nc_input(name)) var_names.append(name) elif name in ["freq"]: inputs.append( - make_freq(name, default=attrs["default"], abstract=attrs["description"]) + make_freq(name, default=attrs.default, abstract=attrs.description) ) elif name in ["indexer"]: inputs.append(make_month()) inputs.append(make_season()) - elif attrs["kind"] in data_types: - choices = list(attrs["choices"]) if "choices" in attrs else None - default = attrs["default"] if attrs["default"] != empty_default else None + elif attrs.kind in data_types: + choices = list(attrs.choices) if "choices" in attrs else None + default = attrs.default if attrs.default != empty_default else None inputs.append( LiteralInput( name, title=name.capitalize().replace("_", " "), - abstract=attrs["description"], - data_type=data_types[attrs["kind"]], + abstract=attrs.description, + data_type=data_types[attrs.kind], min_occurs=0, - max_occurs=1 if attrs["kind"] != InputKind.NUMBER_SEQUENCE else 99, + max_occurs=1 if attrs.kind != InputKind.NUMBER_SEQUENCE else 99, default=default, allowed_values=choices, ) ) - elif attrs["kind"] < 50: + elif attrs.kind < 50: # raise NotImplementedError(f"{parent}: {name}") - LOGGER.warning( - f"{parent}: Argument {name} of kind {attrs['kind']} is not implemented." - ) + msg = f"{parent}: Argument {name} of kind {attrs.kind} is not implemented." + LOGGER.warning(msg) return inputs, var_names def make_freq( - name, default="YS", abstract="", allowed=("YS", "MS", "QS-DEC", "AS-JUL") + name, default="YS", abstract="", allowed=("YS", "MS", "QS-DEC", "YS-JAN", "YS-JUL") ): # noqa: D103 - return LiteralInput( - name, - "Frequency", - abstract=abstract, - data_type="string", - min_occurs=0, - max_occurs=1, - default=default, - allowed_values=allowed, - ) + try: + return LiteralInput( + name, + "Frequency", + abstract=abstract, + data_type="string", + min_occurs=0, + max_occurs=1, + default=default, + allowed_values=allowed, + ) + except pywps.exceptions.InvalidParameterValue: + print(name, default, abstract, allowed) def make_nc_input(name): # noqa: D103 diff --git a/finch/processes/wps_xclim_indices.py b/finch/processes/wps_xclim_indices.py index 7230e3f5..f1486c45 100644 --- a/finch/processes/wps_xclim_indices.py +++ b/finch/processes/wps_xclim_indices.py @@ -7,7 +7,7 @@ import pandas as pd import xarray as xr from anyascii import anyascii -from pandas.api.types import is_numeric_dtype +from pandas.api.types import is_numeric_dtype # noqa from pywps.app.exceptions import ProcessError from . import wpsio diff --git a/requirements.txt b/requirements.txt index f1bc4c81..3018cb9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ anyascii>=0.3.0 -cftime -cf-xarray>=0.6.1,<0.9.3 +cf-xarray>=0.9.3 +cftime>=1.4.1 click>=8.0.0 clisops>=0.11.0 dask[complete]>=2023.5.1 geopandas!=0.13.1 -netcdf4<=1.7.1.post2 +netcdf4<=1.6.5 numpy>=1.23.0 -pandas>=1.5.3,<2.2.0 +pandas>=2.2.0 parse psutil python-slugify @@ -15,11 +15,8 @@ pywps>=4.5.1 pyyaml>=6.0.1 scipy>=1.9.0 sentry-sdk +setuptools>=71.0.0 siphon -xarray>=2023.01.0,<2023.11.0 -xclim==0.43 +xarray>=2023.11.0 +xclim==0.52.2 # remember to match xclim version in environment.yml as well xesmf>=0.6.2 -setuptools>=70.0.0 # not directly required, pinned by Snyk to avoid a vulnerability -werkzeug>=3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability -urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability -zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/requirements_dev.txt b/requirements_dev.txt index 71f9063b..618b2bb8 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -21,11 +21,7 @@ birdhouse-birdy>=0.8.1 geojson ipython jupyter_client -jupyter-server>=2.14.1 # not directly required, pinned by Snyk to avoid a vulnerability lxml matplotlib>=3.5.0 owslib -pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability pre-commit>=3.3.0 -tornado>=6.4.1 # not directly required, pinned by Snyk to avoid a vulnerability -fonttools>=4.43.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/requirements_docs.txt b/requirements_docs.txt index 04a8372c..6cbc5230 100644 --- a/requirements_docs.txt +++ b/requirements_docs.txt @@ -1,19 +1,15 @@ anyascii>=0.3.0 birdhouse-birdy>=0.8.1 -cftime -cf-xarray>=0.6.1,<0.9.3 +cf-xarray>=0.9.3 +cftime>=1.4.1 ipython>=8.0.0 matplotlib>=3.5.0 nbsphinx>=0.9.5 -pandas>=1.5.3,<2.2.0 -pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.23.0 +pandas>=2.2.0 pywps>=4.5.1 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +setuptools>=71.0.0 sphinx>=7.0.0 sphinxcontrib-bibtex>=2.6.0 -tornado>=6.4.1 # not directly required, pinned by Snyk to avoid a vulnerability -werkzeug>=3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability -xarray>=2023.01.0,<2023.11.0 -xclim==0.43 -numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability -fonttools>=4.43.0 # not directly required, pinned by Snyk to avoid a vulnerability +xarray>=2023.11.0 +xclim==0.52.2 diff --git a/requirements_prod.txt b/requirements_prod.txt index 01574ff1..eba9e6fe 100644 --- a/requirements_prod.txt +++ b/requirements_prod.txt @@ -1,3 +1,2 @@ -gunicorn -psycopg2-binary -zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability +gunicorn>=23.0.0 +psycopg2-binary>=2.9.9 From 36959a11c4cce5d680acace7f019f15e04be41ea Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 29 Oct 2024 11:57:19 -0400 Subject: [PATCH 03/32] Avoid bugs of xclim 0.52 --- finch/processes/wps_base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/finch/processes/wps_base.py b/finch/processes/wps_base.py index 05b18cf1..f469b050 100644 --- a/finch/processes/wps_base.py +++ b/finch/processes/wps_base.py @@ -170,7 +170,7 @@ def convert_xclim_inputs_to_pywps( if ( parse_percentiles and name.endswith("_per") - and attrs["kind"] in [InputKind.VARIABLE, InputKind.OPTIONAL_VARIABLE] + and attrs.kind in [InputKind.VARIABLE, InputKind.OPTIONAL_VARIABLE] ): var_name = name.split("_")[0] inputs.append( @@ -197,6 +197,10 @@ def convert_xclim_inputs_to_pywps( elif attrs.kind in data_types: choices = list(attrs.choices) if "choices" in attrs else None default = attrs.default if attrs.default != empty_default else None + # FIXME : Error in xclim 0.52 and 0.53 for frost_free_spell_max_length + if default is not None and choices is not None and default not in choices: + LOGGER.error('Indicator %s has incorrect choices for parameter %s : default %s not in %s', parent, name, default, choices) + choices = [default] inputs.append( LiteralInput( name, From b2e625a704ec6c5fde2e7b2af12790a4e00a9baf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:57:59 +0000 Subject: [PATCH 04/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finch/processes/wps_base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/finch/processes/wps_base.py b/finch/processes/wps_base.py index f469b050..d852411e 100644 --- a/finch/processes/wps_base.py +++ b/finch/processes/wps_base.py @@ -199,7 +199,13 @@ def convert_xclim_inputs_to_pywps( default = attrs.default if attrs.default != empty_default else None # FIXME : Error in xclim 0.52 and 0.53 for frost_free_spell_max_length if default is not None and choices is not None and default not in choices: - LOGGER.error('Indicator %s has incorrect choices for parameter %s : default %s not in %s', parent, name, default, choices) + LOGGER.error( + "Indicator %s has incorrect choices for parameter %s : default %s not in %s", + parent, + name, + default, + choices, + ) choices = [default] inputs.append( LiteralInput( From 07a149d3e3120bd7c9e6ecbaf029d0b73a6e5f86 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 29 Oct 2024 12:32:46 -0400 Subject: [PATCH 05/32] Fix more Parameter is not dict - fix Humidex units --- finch/processes/modules/humidex.yml | 4 ++-- tests/test_wps_xclim_indices.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/finch/processes/modules/humidex.yml b/finch/processes/modules/humidex.yml index 16f1fa04..e4b661c4 100644 --- a/finch/processes/modules/humidex.yml +++ b/finch/processes/modules/humidex.yml @@ -19,10 +19,10 @@ indicators: default: "YS" threshold: default: "30" - units: "" + units: "1" variables: HXmax: - canonical_units: "" + canonical_units: "1" cell_methods: "time: max" description: Daily Maximum Humidex diff --git a/tests/test_wps_xclim_indices.py b/tests/test_wps_xclim_indices.py index 3e1f014d..41b416ba 100644 --- a/tests/test_wps_xclim_indices.py +++ b/tests/test_wps_xclim_indices.py @@ -39,7 +39,7 @@ def test_indicators_processes_discovery(indicator): # Remove args not supported by finch: we remove special kinds, # 50 is "kwargs". 70 is Dataset ('ds') and 99 is "unknown". All normal types are 0-9. parameters = { - k for k, v in indicator.parameters.items() if v["kind"] < 50 or k == "indexer" + k for k, v in indicator.parameters.items() if v.kind < 50 or k == "indexer" } parameters.add("check_missing") parameters.add("missing_options") From f3a6e8e65acebaacad2e675dcfe0d3eeae9f6941 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Tue, 29 Oct 2024 16:35:37 -0400 Subject: [PATCH 06/32] try to use xscen for spatial_avg --- finch/processes/ensemble_utils.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 53994d99..33f682ef 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -10,6 +10,7 @@ from typing import Optional, Union import pandas as pd +import geopandas as gpd import xarray as xr from pandas.api.types import is_numeric_dtype from parse import parse @@ -21,6 +22,8 @@ from xclim.core.calendar import days_since_to_doy, doy_to_days_since, percentile_doy from xclim.core.indicator import Indicator from xclim.indicators.atmos import tg +from xscen.aggregate import spatial_mean +from . import wpsio from .subset import finch_subset_bbox, finch_subset_gridpoint, finch_subset_shape from .utils import ( @@ -350,7 +353,7 @@ def make_file_groups(files_list: list[Path], variables: set) -> list[dict[str, P def make_ensemble( - files: list[Path], percentiles: list[int], average_dims: Optional[tuple[str]] = None + files: list[Path], percentiles: list[int], spatavg: Optional[bool] = False, region: Optional[dict] = None ) -> None: # noqa: D103 ensemble = ensembles.create_ensemble( files, realizations=[file.stem for file in files] @@ -364,8 +367,13 @@ def make_ensemble( if ensemble[v].attrs.get("is_dayofyear", 0) == 1: ensemble[v] = doy_to_days_since(ensemble[v]) - if average_dims is not None: - ensemble = ensemble.mean(dim=average_dims) + if spatavg: + #ensemble = ensemble.mean(dim=average_dims) + if "shape" in region: + method="xesmf" + else: + method = "coslat" + ensemble = spatial_mean(ds=ensemble, method=method, region=region) if percentiles: ensemble_percentiles = ensembles.ensemble_percentiles( @@ -540,13 +548,22 @@ def ensemble_common_handler( ) if single_input_or_none(request.inputs, "average"): + spatavg = True if subset_function == finch_subset_gridpoint: average_dims = ("region",) + region = None + else: - average_dims = ("lat", "lon") + shp = gpd.read_file(Path(request.inputs[wpsio.shape.identifier][0].file)).to_crs("EPSG:4326") + shp['geometry']= shp.make_valid() + region = dict(name='region', method="shape", shape=shp) + #average_dims = ("lat", "lon") else: - average_dims = None - write_log(process, f"Will average over {average_dims}") + #average_dims = None + region = None + spatavg = False + + write_log(process, f"Will average over {region}") base_work_dir = Path(process.workdir) ensembles = [] @@ -625,7 +642,7 @@ def ensemble_common_handler( warnings.filterwarnings("default", category=FutureWarning) warnings.filterwarnings("default", category=UserWarning) - ensemble = make_ensemble(indices_files, ensemble_percentiles, average_dims) + ensemble = make_ensemble(files=indices_files, percentiles=ensemble_percentiles, spatavg=spatavg, region=region) ensemble.attrs["source_datasets"] = "\n".join( [dsinp.url for dsinp in netcdf_inputs] ) From 6a79eaf65010fdfd28bcb0a3ec9d79adbe0420c8 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Wed, 30 Oct 2024 11:41:39 -0400 Subject: [PATCH 07/32] xscen for spatial_avg WIP --- finch/processes/ensemble_utils.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 33f682ef..a5272c6c 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -8,6 +8,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Optional, Union +from datetime import datetime import pandas as pd import geopandas as gpd @@ -361,6 +362,10 @@ def make_ensemble( # make sure we have data starting in 1950 ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) + if len(ensemble.lon) == 1 and len(ensemble.lat)==1 and spatavg: + ensemble.attrs['history'] = f"{ensemble.attrs['history']}:[{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] spatial average flag is set to True but will be skipped as dataset contains a single point" + spatavg = False + # If data is in day of year, percentiles won't make sense. # Convert to "days since" (base will be the time coordinate) for v in ensemble.data_vars: @@ -369,11 +374,12 @@ def make_ensemble( if spatavg: #ensemble = ensemble.mean(dim=average_dims) - if "shape" in region: - method="xesmf" + if region is None: + method = "cos-lat" + ensemble = spatial_mean(ds=ensemble, method=method, region=None, spatial_subset=False) else: - method = "coslat" - ensemble = spatial_mean(ds=ensemble, method=method, region=region) + method = "xesmf" + ensemble = spatial_mean(ds=ensemble, method=method, region=region, kwargs={"skipna": True}) if percentiles: ensemble_percentiles = ensembles.ensemble_percentiles( @@ -550,7 +556,7 @@ def ensemble_common_handler( if single_input_or_none(request.inputs, "average"): spatavg = True if subset_function == finch_subset_gridpoint: - average_dims = ("region",) + #average_dims = ("region",) region = None else: @@ -668,7 +674,7 @@ def ensemble_common_handler( ensemble = ensemble.round(prec) prec = 0 df = dataset_to_dataframe(ensemble) - if average_dims is None: + if spatavg is None: dims = ["lat", "lon", "time"] else: dims = ["time"] From ad12da756e99d48a89c9976dadc3f0c3a644e244 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Wed, 30 Oct 2024 11:54:11 -0400 Subject: [PATCH 08/32] xscen for spatial_avg WIP --- finch/processes/ensemble_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index a5272c6c..1db6879f 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -376,10 +376,11 @@ def make_ensemble( #ensemble = ensemble.mean(dim=average_dims) if region is None: method = "cos-lat" - ensemble = spatial_mean(ds=ensemble, method=method, region=None, spatial_subset=False) + spsub = False else: method = "xesmf" - ensemble = spatial_mean(ds=ensemble, method=method, region=region, kwargs={"skipna": True}) + spsub = True + ensemble = spatial_mean(ds=ensemble, method=method, spatial_subset=spsub, region=region, kwargs={"skipna": True}) if percentiles: ensemble_percentiles = ensembles.ensemble_percentiles( From 57ea7429687fd7f567bb0f75a21f9687424c3eaf Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Wed, 30 Oct 2024 15:25:41 -0400 Subject: [PATCH 09/32] add xscen to requirements --- environment.yml | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 050c33e5..5dcff055 100644 --- a/environment.yml +++ b/environment.yml @@ -27,3 +27,4 @@ dependencies: - xarray >=2023.11.0 - xclim =0.52.2 # remember to match xclim version in requirements_docs.txt as well - xesmf >=0.8.2 + - xscen==0.11.0 diff --git a/requirements.txt b/requirements.txt index cc5c708f..6e533183 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,3 +20,4 @@ siphon xarray>=2023.11.0 xclim==0.52.2 # remember to match xclim version in environment.yml as well xesmf>=0.6.2 +xscen==0.11.0 From 834dc142bf25c618227c1fb02b90650aaffb9edc Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Wed, 30 Oct 2024 15:57:06 -0400 Subject: [PATCH 10/32] tests for spatial averaging with xscen --- tests/test_wps_ensemble.py | 108 +++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/tests/test_wps_ensemble.py b/tests/test_wps_ensemble.py index 9658174b..c8d03f49 100644 --- a/tests/test_wps_ensemble.py +++ b/tests/test_wps_ensemble.py @@ -71,6 +71,110 @@ def test_ensemble_hxmax_days_above_grid_point(client): assert len(ds.attrs["source_datasets"].split("\n")) == 19 +def test_ensemble_spatial_avg_grid_point(client): + # --- given --- + identifier = "ensemble_grid_point_tg_mean" + inputs = [ + wps_literal_input("lat", "45.5, 46"), + wps_literal_input("lon", "-73.0, -73.3"), + wps_literal_input("scenario", "rcp26"), + wps_literal_input("scenario", "rcp45"), + wps_literal_input("dataset", "test_subset"), + wps_literal_input("freq", "MS"), + wps_literal_input("ensemble_percentiles", "20, 50, 80"), + wps_literal_input("output_format", "netcdf"), + wps_literal_input("output_name", "testens"), + ] + + # --- when --- + outputs = execute_process(client, identifier, inputs) + + # --- then --- + assert len(outputs) == 1 + #assert Path(outputs[0]).stem.startswith("testens_45_500_73_000_ssp245_ssp585") + ds = open_dataset(outputs[0]) + dims = dict(ds.dims) + assert dims == { + "region": 2, + "time": 4, # there are roughly 4 months in the test datasets + "scenario": 2, + } + + ensemble_variables = {k: v for k, v in ds.data_vars.items()} + assert sorted(ensemble_variables) == [ + f"tg_mean_p{p}" for p in (20, 50, 80) + ] + for var in ensemble_variables.values(): + variable_dims = {d: s for d, s in zip(var.dims, var.shape)} + for d, v in {"region": 2, "time": 4, "scenario": 2}.items(): + assert variable_dims[d] == v + + # --- given --- + inputs.append(wps_literal_input("average", "True")) + + # --- when --- + outputs = execute_process(client, identifier, inputs) + + # --- then --- + assert len(outputs) == 1 + + ds = open_dataset(outputs[0]) + dims = dict(ds.dims) + assert dims == { + "time": 4, # there are roughly 4 months in the test datasets + "scenario": 2, + } + + ensemble_variables = {k: v for k, v in ds.data_vars.items()} + assert sorted(ensemble_variables) == [ + f"tg_mean_p{p}" for p in (20, 50, 80) + ] + for var in ensemble_variables.values(): + variable_dims = {d: s for d, s in zip(var.dims, var.shape)} + for d, v in {"time": 4, "scenario": 2}.items(): + assert variable_dims[d] == v + + +def test_ensemble_spatial_avg_poly(client): + # --- given --- + identifier = "ensemble_polygon_tg_mean" + inputs = [ + wps_literal_input("shape", geojson.dumps(poly)), + wps_literal_input("scenario", "rcp26"), + wps_literal_input("scenario", "rcp45"), + wps_literal_input("dataset", "test_subset"), + wps_literal_input("freq", "MS"), + wps_literal_input("ensemble_percentiles", "20, 50, 80"), + wps_literal_input("output_format", "netcdf"), + wps_literal_input("output_name", "testens"), + wps_literal_input("average", "True"), + ] + + # --- when --- + outputs = execute_process(client, identifier, inputs) + + # --- then --- + assert len(outputs) == 1 + + ds = open_dataset(outputs[0]) + dims = dict(ds.dims) + assert dims == { + "time": 4, # there are roughly 4 months in the test datasets + "scenario": 2, + } + + ensemble_variables = {k: v for k, v in ds.data_vars.items()} + assert sorted(ensemble_variables) == [ + f"tg_mean_p{p}" for p in (20, 50, 80) + ] + for var in ensemble_variables.values(): + variable_dims = {d: s for d, s in zip(var.dims, var.shape)} + for d, v in {"time": 4, "scenario": 2}.items(): + assert variable_dims[d] == v + + + + def test_ensemble_heatwave_frequency_grid_point(client): # --- given --- identifier = "ensemble_grid_point_heat_wave_frequency" @@ -551,6 +655,10 @@ def test_ensemble_compute_intermediate_growing_degree_days_grid_point(client): variable_dims = dict(zip(var.dims, var.shape)) assert variable_dims == {"region": 1, "time": 1, "scenario": 1} + inputs.append(wps_literal_input("average", "True")) + + # --- when --- + outputs = execute_process(client, identifier, inputs) def test_ensemble_heatwave_frequency_polygon(client): # --- given --- From 73f506f5125d2bc82733ea70a8ee2c90e1310069 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 31 Oct 2024 09:47:06 -0400 Subject: [PATCH 11/32] small changes --- finch/processes/ensemble_utils.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 1db6879f..31d350d0 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -376,11 +376,9 @@ def make_ensemble( #ensemble = ensemble.mean(dim=average_dims) if region is None: method = "cos-lat" - spsub = False else: method = "xesmf" - spsub = True - ensemble = spatial_mean(ds=ensemble, method=method, spatial_subset=spsub, region=region, kwargs={"skipna": True}) + ensemble = spatial_mean(ds=ensemble, method=method, spatial_subset=False, region=region, kwargs={"skipna": True}) if percentiles: ensemble_percentiles = ensembles.ensemble_percentiles( @@ -557,14 +555,11 @@ def ensemble_common_handler( if single_input_or_none(request.inputs, "average"): spatavg = True if subset_function == finch_subset_gridpoint: - #average_dims = ("region",) region = None - else: shp = gpd.read_file(Path(request.inputs[wpsio.shape.identifier][0].file)).to_crs("EPSG:4326") shp['geometry']= shp.make_valid() region = dict(name='region', method="shape", shape=shp) - #average_dims = ("lat", "lon") else: #average_dims = None region = None From 11245adbb99ffda769dacb5f5f29be53ed39d489 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 31 Oct 2024 09:48:08 -0400 Subject: [PATCH 12/32] test spatial avg no percentiles --- tests/test_wps_ensemble.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/test_wps_ensemble.py b/tests/test_wps_ensemble.py index c8d03f49..a1e07541 100644 --- a/tests/test_wps_ensemble.py +++ b/tests/test_wps_ensemble.py @@ -173,6 +173,42 @@ def test_ensemble_spatial_avg_poly(client): assert variable_dims[d] == v +def test_ensemble_spatial_avg_poly_noperc(client): + # --- given --- + identifier = "ensemble_polygon_tg_mean" + inputs = [ + wps_literal_input("shape", geojson.dumps(poly)), + wps_literal_input("scenario", "rcp26"), + wps_literal_input("scenario", "rcp45"), + wps_literal_input("dataset", "test_subset"), + wps_literal_input("freq", "MS"), + wps_literal_input("ensemble_percentiles", ""), + wps_literal_input("output_format", "netcdf"), + wps_literal_input("output_name", "testens"), + wps_literal_input("average", "True"), + ] + + # --- when --- + outputs = execute_process(client, identifier, inputs) + + # --- then --- + assert len(outputs) == 1 + + ds = open_dataset(outputs[0]) + dims = dict(ds.dims) + exp_dims = { + "realization": 2, + "time": 4, # there are roughly 4 months in the test datasets + "scenario": 2, + } + assert dims == exp_dims + + ensemble_variables = {k: v for k, v in ds.data_vars.items()} + assert sorted(ensemble_variables) == [f"tg_mean"] + for var in ensemble_variables.values(): + variable_dims = {d: s for d, s in zip(var.dims, var.shape)} + for d, v in exp_dims.items(): + assert variable_dims[d] == v def test_ensemble_heatwave_frequency_grid_point(client): From dce2d094d8c0c8f35a81aeb367755953905a5e89 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:13:48 +0000 Subject: [PATCH 13/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finch/processes/ensemble_utils.py | 46 +++++++++++++++++++++---------- tests/test_wps_ensemble.py | 15 ++++------ 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 31d350d0..a5a3b6f6 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -6,12 +6,12 @@ from collections.abc import Iterable from copy import deepcopy from dataclasses import dataclass +from datetime import datetime from pathlib import Path from typing import Optional, Union -from datetime import datetime -import pandas as pd import geopandas as gpd +import pandas as pd import xarray as xr from pandas.api.types import is_numeric_dtype from parse import parse @@ -24,8 +24,8 @@ from xclim.core.indicator import Indicator from xclim.indicators.atmos import tg from xscen.aggregate import spatial_mean -from . import wpsio +from . import wpsio from .subset import finch_subset_bbox, finch_subset_gridpoint, finch_subset_shape from .utils import ( DatasetConfiguration, @@ -354,7 +354,10 @@ def make_file_groups(files_list: list[Path], variables: set) -> list[dict[str, P def make_ensemble( - files: list[Path], percentiles: list[int], spatavg: Optional[bool] = False, region: Optional[dict] = None + files: list[Path], + percentiles: list[int], + spatavg: Optional[bool] = False, + region: Optional[dict] = None, ) -> None: # noqa: D103 ensemble = ensembles.create_ensemble( files, realizations=[file.stem for file in files] @@ -362,9 +365,11 @@ def make_ensemble( # make sure we have data starting in 1950 ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) - if len(ensemble.lon) == 1 and len(ensemble.lat)==1 and spatavg: - ensemble.attrs['history'] = f"{ensemble.attrs['history']}:[{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] spatial average flag is set to True but will be skipped as dataset contains a single point" - spatavg = False + if len(ensemble.lon) == 1 and len(ensemble.lat) == 1 and spatavg: + ensemble.attrs["history"] = ( + f"{ensemble.attrs['history']}:[{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] spatial average flag is set to True but will be skipped as dataset contains a single point" + ) + spatavg = False # If data is in day of year, percentiles won't make sense. # Convert to "days since" (base will be the time coordinate) @@ -373,12 +378,18 @@ def make_ensemble( ensemble[v] = doy_to_days_since(ensemble[v]) if spatavg: - #ensemble = ensemble.mean(dim=average_dims) + # ensemble = ensemble.mean(dim=average_dims) if region is None: method = "cos-lat" else: method = "xesmf" - ensemble = spatial_mean(ds=ensemble, method=method, spatial_subset=False, region=region, kwargs={"skipna": True}) + ensemble = spatial_mean( + ds=ensemble, + method=method, + spatial_subset=False, + region=region, + kwargs={"skipna": True}, + ) if percentiles: ensemble_percentiles = ensembles.ensemble_percentiles( @@ -557,11 +568,13 @@ def ensemble_common_handler( if subset_function == finch_subset_gridpoint: region = None else: - shp = gpd.read_file(Path(request.inputs[wpsio.shape.identifier][0].file)).to_crs("EPSG:4326") - shp['geometry']= shp.make_valid() - region = dict(name='region', method="shape", shape=shp) + shp = gpd.read_file( + Path(request.inputs[wpsio.shape.identifier][0].file) + ).to_crs("EPSG:4326") + shp["geometry"] = shp.make_valid() + region = dict(name="region", method="shape", shape=shp) else: - #average_dims = None + # average_dims = None region = None spatavg = False @@ -644,7 +657,12 @@ def ensemble_common_handler( warnings.filterwarnings("default", category=FutureWarning) warnings.filterwarnings("default", category=UserWarning) - ensemble = make_ensemble(files=indices_files, percentiles=ensemble_percentiles, spatavg=spatavg, region=region) + ensemble = make_ensemble( + files=indices_files, + percentiles=ensemble_percentiles, + spatavg=spatavg, + region=region, + ) ensemble.attrs["source_datasets"] = "\n".join( [dsinp.url for dsinp in netcdf_inputs] ) diff --git a/tests/test_wps_ensemble.py b/tests/test_wps_ensemble.py index a1e07541..35ae7d63 100644 --- a/tests/test_wps_ensemble.py +++ b/tests/test_wps_ensemble.py @@ -91,7 +91,7 @@ def test_ensemble_spatial_avg_grid_point(client): # --- then --- assert len(outputs) == 1 - #assert Path(outputs[0]).stem.startswith("testens_45_500_73_000_ssp245_ssp585") + # assert Path(outputs[0]).stem.startswith("testens_45_500_73_000_ssp245_ssp585") ds = open_dataset(outputs[0]) dims = dict(ds.dims) assert dims == { @@ -101,9 +101,7 @@ def test_ensemble_spatial_avg_grid_point(client): } ensemble_variables = {k: v for k, v in ds.data_vars.items()} - assert sorted(ensemble_variables) == [ - f"tg_mean_p{p}" for p in (20, 50, 80) - ] + assert sorted(ensemble_variables) == [f"tg_mean_p{p}" for p in (20, 50, 80)] for var in ensemble_variables.values(): variable_dims = {d: s for d, s in zip(var.dims, var.shape)} for d, v in {"region": 2, "time": 4, "scenario": 2}.items(): @@ -126,9 +124,7 @@ def test_ensemble_spatial_avg_grid_point(client): } ensemble_variables = {k: v for k, v in ds.data_vars.items()} - assert sorted(ensemble_variables) == [ - f"tg_mean_p{p}" for p in (20, 50, 80) - ] + assert sorted(ensemble_variables) == [f"tg_mean_p{p}" for p in (20, 50, 80)] for var in ensemble_variables.values(): variable_dims = {d: s for d, s in zip(var.dims, var.shape)} for d, v in {"time": 4, "scenario": 2}.items(): @@ -164,9 +160,7 @@ def test_ensemble_spatial_avg_poly(client): } ensemble_variables = {k: v for k, v in ds.data_vars.items()} - assert sorted(ensemble_variables) == [ - f"tg_mean_p{p}" for p in (20, 50, 80) - ] + assert sorted(ensemble_variables) == [f"tg_mean_p{p}" for p in (20, 50, 80)] for var in ensemble_variables.values(): variable_dims = {d: s for d, s in zip(var.dims, var.shape)} for d, v in {"time": 4, "scenario": 2}.items(): @@ -696,6 +690,7 @@ def test_ensemble_compute_intermediate_growing_degree_days_grid_point(client): # --- when --- outputs = execute_process(client, identifier, inputs) + def test_ensemble_heatwave_frequency_polygon(client): # --- given --- identifier = "ensemble_polygon_heat_wave_frequency" From d211b62d95aa3b9b7b03dbacf82ca600cfafbb62 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 31 Oct 2024 15:00:33 -0400 Subject: [PATCH 14/32] black failure --- finch/processes/ensemble_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 31d350d0..a7ebfa99 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -363,7 +363,9 @@ def make_ensemble( ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) if len(ensemble.lon) == 1 and len(ensemble.lat)==1 and spatavg: - ensemble.attrs['history'] = f"{ensemble.attrs['history']}:[{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] spatial average flag is set to True but will be skipped as dataset contains a single point" + ensemble.attrs['history'] = (f"{ensemble.attrs['history']}:[{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] " + f"spatial average flag is set to True but will be skipped as dataset contains a " + f"single point") spatavg = False # If data is in day of year, percentiles won't make sense. @@ -378,7 +380,8 @@ def make_ensemble( method = "cos-lat" else: method = "xesmf" - ensemble = spatial_mean(ds=ensemble, method=method, spatial_subset=False, region=region, kwargs={"skipna": True}) + ensemble = spatial_mean(ds=ensemble, method=method, spatial_subset=False, + region=region, kwargs={"skipna": True}) if percentiles: ensemble_percentiles = ensembles.ensemble_percentiles( From dcc340e1364beaabf7a59863085de825813a3a58 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 31 Oct 2024 15:04:22 -0400 Subject: [PATCH 15/32] black failure --- finch/processes/ensemble_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 76fb3771..0ddd1dd9 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -366,7 +366,9 @@ def make_ensemble( ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) if len(ensemble.lon) == 1 and len(ensemble.lat)==1 and spatavg: - ensemble.attrs['history'] = f"{ensemble.attrs['history']}:[{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] spatial average flag is set to True but will be skipped as dataset contains a single point" + ensemble.attrs['history'] = (f"{ensemble.attrs['history']}:[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"spatial average flag is set to True but will be skipped as dataset contains a " + f"single point") spatavg = False # If data is in day of year, percentiles won't make sense. From 00339ccd3e3dc4f732727c77f3ea2269e11563c9 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 31 Oct 2024 15:06:51 -0400 Subject: [PATCH 16/32] unpin xscen for now --- environment.yml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 5dcff055..635705e0 100644 --- a/environment.yml +++ b/environment.yml @@ -27,4 +27,4 @@ dependencies: - xarray >=2023.11.0 - xclim =0.52.2 # remember to match xclim version in requirements_docs.txt as well - xesmf >=0.8.2 - - xscen==0.11.0 + - xscen diff --git a/requirements.txt b/requirements.txt index 6e533183..cdd74b93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,4 @@ siphon xarray>=2023.11.0 xclim==0.52.2 # remember to match xclim version in environment.yml as well xesmf>=0.6.2 -xscen==0.11.0 +xscen From 211fd6a588e7fafb445aa3e82a47425b806c24ae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Oct 2024 19:07:28 +0000 Subject: [PATCH 17/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finch/processes/ensemble_utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 0ddd1dd9..823420a1 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -365,11 +365,13 @@ def make_ensemble( # make sure we have data starting in 1950 ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) - if len(ensemble.lon) == 1 and len(ensemble.lat)==1 and spatavg: - ensemble.attrs['history'] = (f"{ensemble.attrs['history']}:[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " - f"spatial average flag is set to True but will be skipped as dataset contains a " - f"single point") - spatavg = False + if len(ensemble.lon) == 1 and len(ensemble.lat) == 1 and spatavg: + ensemble.attrs["history"] = ( + f"{ensemble.attrs['history']}:[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"spatial average flag is set to True but will be skipped as dataset contains a " + f"single point" + ) + spatavg = False # If data is in day of year, percentiles won't make sense. # Convert to "days since" (base will be the time coordinate) From 90ec6412b22881e8c0b32f9ddcbcb89fd20ed912 Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 5 Dec 2024 11:47:31 -0500 Subject: [PATCH 18/32] fix bbox, history, csv order, skip geomet processes --- finch/processes/__init__.py | 2 +- finch/processes/ensemble_utils.py | 13 ++++++++++--- tests/test_wps_ensemble.py | 8 ++++---- tests/test_wps_geoseries_to_netcdf.py | 4 +++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/finch/processes/__init__.py b/finch/processes/__init__.py index 90346cf8..1a722746 100644 --- a/finch/processes/__init__.py +++ b/finch/processes/__init__.py @@ -143,7 +143,7 @@ def get_processes(): SubsetPolygonProcess(), AveragePolygonProcess(), HourlyToDailyProcess(), - GeoseriesToNetcdfProcess(), + # GeoseriesToNetcdfProcess(), ] return processes diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index 0ddd1dd9..2fce837e 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -365,10 +365,10 @@ def make_ensemble( # make sure we have data starting in 1950 ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) - if len(ensemble.lon) == 1 and len(ensemble.lat)==1 and spatavg: - ensemble.attrs['history'] = (f"{ensemble.attrs['history']}:[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + if ensemble.lon.size == 1 and ensemble.lat.size == 1 and spatavg: + ensemble.attrs['history'] = (f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " f"spatial average flag is set to True but will be skipped as dataset contains a " - f"single point") + f"single point\n{ensemble.attrs.get('history', '')}") spatavg = False # If data is in day of year, percentiles won't make sense. @@ -567,6 +567,13 @@ def ensemble_common_handler( spatavg = True if subset_function == finch_subset_gridpoint: region = None + elif subset_function == finch_subset_bbox: + lon0 = single_input_or_none(request.inputs, wpsio.lon0.identifier) + lat0 = single_input_or_none(request.inputs, wpsio.lat0.identifier) + lon1 = single_input_or_none(request.inputs, wpsio.lon1.identifier) + lat1 = single_input_or_none(request.inputs, wpsio.lat1.identifier) + bbox = dict(lat_bnds=[lat0, lat1], lon_bnds=[lon0, lon1]) + region = dict(name="region", method="bbox", **bbox) else: shp = gpd.read_file( Path(request.inputs[wpsio.shape.identifier][0].file) diff --git a/tests/test_wps_ensemble.py b/tests/test_wps_ensemble.py index 35ae7d63..55f4434d 100644 --- a/tests/test_wps_ensemble.py +++ b/tests/test_wps_ensemble.py @@ -271,7 +271,7 @@ def test_ensemble_tx_mean_grid_point_no_perc_csv(client): data_filename = [n for n in zf.namelist() if "metadata" not in n] csv = zf.read(data_filename[0]).decode() lines = csv.split("\n") - assert lines[0].startswith("lat,lon,time,scenario") + assert lines[0].startswith("time,lat,lon,scenario") assert len(lines[0].split(",")) == 6 assert all([line.startswith("tx_mean:") for line in lines[0].split(",")[-2:]]) n_data_rows = len(lines) - 2 @@ -489,7 +489,7 @@ def test_ensemble_heatwave_frequency_grid_point_csv(client): data_filename = [n for n in zf.namelist() if "metadata" not in n] csv = zf.read(data_filename[0]).decode() lines = csv.split("\n") - assert lines[0].startswith("lat,lon,time,scenario") + assert lines[0].startswith("time,lat,lon,scenario") n_data_rows = len(lines) - 2 assert n_data_rows == 4 # time=4 (last month is NaN, but kept in CSV) @@ -522,7 +522,7 @@ def test_ensemble_heatwave_frequency_bbox_csv(client): data_filename = [n for n in zf.namelist() if "metadata" not in n] csv = zf.read(data_filename[0]).decode() lines = csv.split("\n") - assert lines[0].startswith("lat,lon,time") + assert lines[0].startswith("time,lat,lon") n_data_rows = len(lines) - 2 assert ( n_data_rows == 2 * 2 * 4 @@ -774,7 +774,7 @@ def test_ensemble_heatwave_frequency_polygon_csv(client): data_filename = [n for n in zf.namelist() if "metadata" not in n] csv = zf.read(data_filename[0]).decode() lines = csv.split("\n") - assert lines[0].startswith("lat,lon,time") + assert lines[0].startswith("time,lat,lon") n_data_rows = len(lines) - 2 # header + ending line # lat: 11 lon: 11, time=4 (last month is NaN, but kept in CSV) assert n_data_rows == 11 * 11 * 4 diff --git a/tests/test_wps_geoseries_to_netcdf.py b/tests/test_wps_geoseries_to_netcdf.py index b4793705..308eb595 100644 --- a/tests/test_wps_geoseries_to_netcdf.py +++ b/tests/test_wps_geoseries_to_netcdf.py @@ -1,5 +1,5 @@ from pathlib import Path - +import pytest import numpy as np import xarray as xr @@ -10,6 +10,7 @@ def geomet_geojson(): return (Path(__file__).parent / "data" / "geomet.geojson").as_posix() +@pytest.skip('Broken in cf-xarray') def test_wps_geoseries_to_netcdf(client): identifier = "geoseries_to_netcdf" inputs = [ @@ -24,6 +25,7 @@ def test_wps_geoseries_to_netcdf(client): print(ds.attrs) +@pytest.skip('Broken in cf-xarray') def test_wps_geoseries_to_netcdf_feat_squeeze(client): identifier = "geoseries_to_netcdf" inputs = [ From 5051539475c46d2ba8b8026ea77419ee3b15b8ed Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 5 Dec 2024 16:17:02 -0500 Subject: [PATCH 19/32] fix bbox, history, csv order, skip geomet processes --- finch/default.cfg | 3 +-- finch/processes/modules/streamflow.yml | 6 +++--- tests/test_wps_caps.py | 2 +- tests/test_wps_geoseries_to_netcdf.py | 4 ++-- tests/test_wps_xclim_indices.py | 1 + 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/finch/default.cfg b/finch/default.cfg index a355a2a6..ecb2486b 100644 --- a/finch/default.cfg +++ b/finch/default.cfg @@ -19,8 +19,7 @@ language=en-US,fr subset_threads = 1 datasets_config = datasets.yml default_dataset = candcs-u6 -xclim_modules = processes/modules/humidex -; processes/modules/streamflow ; Not yet reimplemented +xclim_modules = processes/modules/humidex,processes/modules/streamflow [finch:metadata] # All fields here are added as string attributes of computed indices. diff --git a/finch/processes/modules/streamflow.yml b/finch/processes/modules/streamflow.yml index fe96396b..b57a729e 100644 --- a/finch/processes/modules/streamflow.yml +++ b/finch/processes/modules/streamflow.yml @@ -3,7 +3,7 @@ doc: realm: land indicators: discharge_stats: - base: xclim.core.indicator.stats + base: stats cf_attrs: long_name: Daily flow statistics description: "{freq} {op} of daily flow ({indexer})." @@ -12,7 +12,7 @@ indicators: input: da: discharge freq_analysis: - base: xclim.core.indicator.return_level + base: return_level cf_attrs: long_name: N-year return level discharge description: "Streamflow frequency analysis for the {mode} {indexer} {window}-day flow estimated using the {dist} distribution." @@ -21,7 +21,7 @@ indicators: input: da: discharge discharge_distribution_fit: - base: xclim.core.indicator.fit + base: fit cf_attrs: var_name: params input: diff --git a/tests/test_wps_caps.py b/tests/test_wps_caps.py index 50b0c123..006df0d8 100644 --- a/tests/test_wps_caps.py +++ b/tests/test_wps_caps.py @@ -52,7 +52,7 @@ def mock_config_get(*args, **kwargs): indicators.extend(mod_dict[mod]["indicators"]) subset_processes_count = 4 sdba_processes_count = 1 - others = 2 + others = 1 assert len( indicators ) + others + subset_processes_count + sdba_processes_count == len(names) diff --git a/tests/test_wps_geoseries_to_netcdf.py b/tests/test_wps_geoseries_to_netcdf.py index 308eb595..62b1ed84 100644 --- a/tests/test_wps_geoseries_to_netcdf.py +++ b/tests/test_wps_geoseries_to_netcdf.py @@ -10,7 +10,7 @@ def geomet_geojson(): return (Path(__file__).parent / "data" / "geomet.geojson").as_posix() -@pytest.skip('Broken in cf-xarray') +@pytest.mark.skip('Broken in cf-xarray') def test_wps_geoseries_to_netcdf(client): identifier = "geoseries_to_netcdf" inputs = [ @@ -25,7 +25,7 @@ def test_wps_geoseries_to_netcdf(client): print(ds.attrs) -@pytest.skip('Broken in cf-xarray') +@pytest.mark.skip('Broken in cf-xarray') def test_wps_geoseries_to_netcdf_feat_squeeze(client): identifier = "geoseries_to_netcdf" inputs = [ diff --git a/tests/test_wps_xclim_indices.py b/tests/test_wps_xclim_indices.py index 41b416ba..504d6726 100644 --- a/tests/test_wps_xclim_indices.py +++ b/tests/test_wps_xclim_indices.py @@ -259,6 +259,7 @@ def test_nan(self, client, tmp_path): inputs = [ wps_input_file("discharge", tmp_path / "q.nc"), wps_literal_input("dist", "norm"), + wps_literal_input("check_missing", "skip") ] outputs = execute_process(client, self.identifier, inputs) ds = xr.open_dataset(outputs[0]) From 3a6fec76a8e30c30e92a890e433fb45b7308d925 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 21:21:04 +0000 Subject: [PATCH 20/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finch/processes/ensemble_utils.py | 10 ++++++---- tests/test_wps_geoseries_to_netcdf.py | 7 ++++--- tests/test_wps_xclim_indices.py | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index a2ad3d24..599539bb 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -366,10 +366,12 @@ def make_ensemble( ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) if ensemble.lon.size == 1 and ensemble.lat.size == 1 and spatavg: - ensemble.attrs['history'] = (f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " - f"spatial average flag is set to True but will be skipped as dataset contains a " - f"single point\n{ensemble.attrs.get('history', '')}") - spatavg = False + ensemble.attrs["history"] = ( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"spatial average flag is set to True but will be skipped as dataset contains a " + f"single point\n{ensemble.attrs.get('history', '')}" + ) + spatavg = False # If data is in day of year, percentiles won't make sense. # Convert to "days since" (base will be the time coordinate) diff --git a/tests/test_wps_geoseries_to_netcdf.py b/tests/test_wps_geoseries_to_netcdf.py index 62b1ed84..0217e0b8 100644 --- a/tests/test_wps_geoseries_to_netcdf.py +++ b/tests/test_wps_geoseries_to_netcdf.py @@ -1,6 +1,7 @@ from pathlib import Path -import pytest + import numpy as np +import pytest import xarray as xr from _utils import execute_process, wps_input_file, wps_literal_input @@ -10,7 +11,7 @@ def geomet_geojson(): return (Path(__file__).parent / "data" / "geomet.geojson").as_posix() -@pytest.mark.skip('Broken in cf-xarray') +@pytest.mark.skip("Broken in cf-xarray") def test_wps_geoseries_to_netcdf(client): identifier = "geoseries_to_netcdf" inputs = [ @@ -25,7 +26,7 @@ def test_wps_geoseries_to_netcdf(client): print(ds.attrs) -@pytest.mark.skip('Broken in cf-xarray') +@pytest.mark.skip("Broken in cf-xarray") def test_wps_geoseries_to_netcdf_feat_squeeze(client): identifier = "geoseries_to_netcdf" inputs = [ diff --git a/tests/test_wps_xclim_indices.py b/tests/test_wps_xclim_indices.py index 504d6726..688f47da 100644 --- a/tests/test_wps_xclim_indices.py +++ b/tests/test_wps_xclim_indices.py @@ -259,7 +259,7 @@ def test_nan(self, client, tmp_path): inputs = [ wps_input_file("discharge", tmp_path / "q.nc"), wps_literal_input("dist", "norm"), - wps_literal_input("check_missing", "skip") + wps_literal_input("check_missing", "skip"), ] outputs = execute_process(client, self.identifier, inputs) ds = xr.open_dataset(outputs[0]) From a3da943e71b6d8be25042f9771bfbc5314a48fff Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Fri, 6 Dec 2024 10:01:10 -0500 Subject: [PATCH 21/32] re mplement lost bbox fix --- finch/processes/ensemble_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index a2ad3d24..2fce837e 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -567,6 +567,13 @@ def ensemble_common_handler( spatavg = True if subset_function == finch_subset_gridpoint: region = None + elif subset_function == finch_subset_bbox: + lon0 = single_input_or_none(request.inputs, wpsio.lon0.identifier) + lat0 = single_input_or_none(request.inputs, wpsio.lat0.identifier) + lon1 = single_input_or_none(request.inputs, wpsio.lon1.identifier) + lat1 = single_input_or_none(request.inputs, wpsio.lat1.identifier) + bbox = dict(lat_bnds=[lat0, lat1], lon_bnds=[lon0, lon1]) + region = dict(name="region", method="bbox", **bbox) else: shp = gpd.read_file( Path(request.inputs[wpsio.shape.identifier][0].file) From 1debaa2f7fd35cc8a9db6fde2a15cd33f428780e Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 9 Dec 2024 18:00:44 -0500 Subject: [PATCH 22/32] Fix is_opendap - pin xesmf - use pytest for tmp paths --- environment.yml | 2 +- finch/processes/utils.py | 5 +++- requirements.txt | 2 +- tests/conftest.py | 59 ++++++++++++++++------------------------ 4 files changed, 30 insertions(+), 38 deletions(-) diff --git a/environment.yml b/environment.yml index 635705e0..4800a61b 100644 --- a/environment.yml +++ b/environment.yml @@ -26,5 +26,5 @@ dependencies: - siphon - xarray >=2023.11.0 - xclim =0.52.2 # remember to match xclim version in requirements_docs.txt as well - - xesmf >=0.8.2 + - xesmf >=0.8.2,!=0.8.8 - xscen diff --git a/finch/processes/utils.py b/finch/processes/utils.py index e6cab989..64d9bb9f 100644 --- a/finch/processes/utils.py +++ b/finch/processes/utils.py @@ -12,6 +12,7 @@ from pathlib import Path from typing import Callable, Deque, Optional, Union from urllib.error import URLError +from urllib.parse import urlparse, urlunparse import cftime import numpy as np @@ -511,7 +512,9 @@ def is_opendap_url(url): Even then, some OpenDAP servers seem to not include the specified header... So we need to let the netCDF4 library actually open the file. """ - req = urllib.request.Request(url, method="HEAD") + parts = urlparse(url) + meta_url = urlunparse([parts[0], parts[1], parts[2] + '.dds', None, None, None]) + req = urllib.request.Request(meta_url, method="HEAD") try: with urllib.request.urlopen(req, timeout=5) as response: diff --git a/requirements.txt b/requirements.txt index cdd74b93..1da39e18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,5 +19,5 @@ setuptools>=71.0.0 siphon xarray>=2023.11.0 xclim==0.52.2 # remember to match xclim version in environment.yml as well -xesmf>=0.6.2 +xesmf>=0.6.2,!=0.8.8 xscen diff --git a/tests/conftest.py b/tests/conftest.py index ce7a3a15..3efe68af 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,4 @@ import collections -import tempfile from pathlib import Path from shutil import rmtree from typing import Optional, Union @@ -20,18 +19,6 @@ import finch.wsgi from _common import CFG_FILE, client_for -TEMP_DIR = Path(__file__).parent / "tmp" - - -@pytest.fixture(scope="session", autouse=True) -def setup_temp_data(request): - TEMP_DIR.mkdir(exist_ok=True) - - def _cleanup_temp(): - rmtree(TEMP_DIR, ignore_errors=True) - - request.addfinalizer(_cleanup_temp) - def _create_test_dataset( variable: str, @@ -91,16 +78,16 @@ def _create_test_dataset( return obj -def _create_and_write_dataset(variable, **kwds) -> Path: +def _create_and_write_dataset(variable, folder, **kwds) -> Path: """Write a DataSet to disk and return its path""" ds = _create_test_dataset(variable, **kwds) - return _write_dataset(variable, ds) + return _write_dataset(variable, ds, folder) -def _write_dataset(variable, ds) -> Path: - _, filename = tempfile.mkstemp(f"finch_test_data_{variable}.nc", dir=TEMP_DIR) +def _write_dataset(variable, ds, folder) -> Path: + filename = folder / f"finch_test_data_{variable}.nc" ds.to_netcdf(filename) - return Path(filename) + return filename variable_descriptions = { @@ -139,16 +126,17 @@ def _write_dataset(variable, ds) -> Path: @pytest.fixture(scope="session") -def netcdf_datasets(request) -> dict[str, Path]: +def netcdf_datasets(request, tmp_path_factory) -> dict[str, Path]: """Returns a Dict mapping a variable name to a corresponding netcdf path""" datasets = dict() + tmpdir = tmp_path_factory.mktemp('nc_datasets') for variable_name, description in variable_descriptions.items(): - filename = _create_and_write_dataset(variable_name, **description, seed=1) + filename = _create_and_write_dataset(variable_name, folder=tmpdir, **description, seed=1) datasets[variable_name] = filename # With missing values filename = _create_and_write_dataset( - variable_name, **description, seed=1, missing=True + variable_name, folder=tmpdir, **description, seed=1, missing=True ) datasets[variable_name + "_missing"] = filename @@ -156,24 +144,24 @@ def netcdf_datasets(request) -> dict[str, Path]: tas = xr.open_dataset(datasets["tas"]).tas tn10 = percentile_doy(tasmin, per=0.1).to_dataset(name="tn10") - datasets["tn10"] = _write_dataset("tn10", tn10) + datasets["tn10"] = _write_dataset("tn10", tn10, tmpdir) t10 = percentile_doy(tas, per=0.1).to_dataset(name="t10") - datasets["t10"] = _write_dataset("t10", t10) + datasets["t10"] = _write_dataset("t10", t10, tmpdir) t90 = percentile_doy(tas, per=0.9).to_dataset(name="t90") - datasets["t90"] = _write_dataset("t90", t90) + datasets["t90"] = _write_dataset("t90", t90, tmpdir) # Create file with two variables keys = ["pr", "discharge"] ds = xr.merge( [_create_test_dataset(k, **variable_descriptions[k], seed=1) for k in keys] ) - datasets["pr_discharge"] = _write_dataset("pr_discharge", ds) + datasets["pr_discharge"] = _write_dataset("pr_discharge", ds, tmpdir) return datasets @pytest.fixture(scope="session") -def netcdf_sdba_ds(request) -> tuple[dict[str, Path], DataArray]: +def netcdf_sdba_ds(request, tmp_path_factory) -> tuple[dict[str, Path], DataArray]: """Return datasets useful to test sdba.""" out = {} u = np.random.rand(10000) @@ -187,22 +175,23 @@ def netcdf_sdba_ds(request) -> tuple[dict[str, Path], DataArray]: y = yd.ppf(u) # Test train - out["qdm_tas_hist"] = _write_dataset("qdm_tas_hist", series(x, "tas")) - out["qdm_tas_ref"] = _write_dataset("qdm_tas_ref", series(y, "tas")) - out["qdm_pr_hist"] = _write_dataset("qdm_pr_hist", series(x, "pr")) - out["qdm_pr_ref"] = _write_dataset("qdm_pr_ref", series(y, "pr")) + tmpdir = tmp_path_factory.mktemp('nc_sdba_datasets') + out["qdm_tas_hist"] = _write_dataset("qdm_tas_hist", series(x, "tas"), tmpdir) + out["qdm_tas_ref"] = _write_dataset("qdm_tas_ref", series(y, "tas"), tmpdir) + out["qdm_pr_hist"] = _write_dataset("qdm_pr_hist", series(x, "pr"), tmpdir) + out["qdm_pr_ref"] = _write_dataset("qdm_pr_ref", series(y, "pr"), tmpdir) return out, series(u, "u") @pytest.fixture(scope="module") -def client(): +def client(tmp_path_factory): service = finch.wsgi.create_app(cfgfiles=CFG_FILE) # overwrite output path from defaults.cfg - outputpath = tempfile.gettempdir() + outputpath = tmp_path_factory.mktemp('wps_outputs') configuration.CONFIG.set("server", "outputurl", f"file://{outputpath}") - configuration.CONFIG.set("server", "outputpath", outputpath) + configuration.CONFIG.set("server", "outputpath", str(outputpath)) return client_for(service) @@ -242,8 +231,8 @@ def series(values: np.ndarray, name: str, start: str = "2000-01-01"): @pytest.fixture -def hourly_dataset(): # noqa: F811 +def hourly_dataset(tmp_path_factory): # noqa: F811 """Ten days of precip with first hour missing.""" a = np.arange(10 * 24.0) a[0] = np.nan - return _write_dataset("pr_hr", timeseries(values=a, variable="pr", freq="H")) + return _write_dataset("pr_hr", timeseries(values=a, variable="pr", freq="H"), tmp_path_factory.mktemp('hourly_ds')) From ccd61aa383dea69eb5057694b0c6fad3c00ddbc8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 23:01:03 +0000 Subject: [PATCH 23/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finch/processes/utils.py | 2 +- tests/conftest.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/finch/processes/utils.py b/finch/processes/utils.py index 64d9bb9f..78d952e8 100644 --- a/finch/processes/utils.py +++ b/finch/processes/utils.py @@ -513,7 +513,7 @@ def is_opendap_url(url): So we need to let the netCDF4 library actually open the file. """ parts = urlparse(url) - meta_url = urlunparse([parts[0], parts[1], parts[2] + '.dds', None, None, None]) + meta_url = urlunparse([parts[0], parts[1], parts[2] + ".dds", None, None, None]) req = urllib.request.Request(meta_url, method="HEAD") try: diff --git a/tests/conftest.py b/tests/conftest.py index 3efe68af..e2c69a10 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -129,9 +129,11 @@ def _write_dataset(variable, ds, folder) -> Path: def netcdf_datasets(request, tmp_path_factory) -> dict[str, Path]: """Returns a Dict mapping a variable name to a corresponding netcdf path""" datasets = dict() - tmpdir = tmp_path_factory.mktemp('nc_datasets') + tmpdir = tmp_path_factory.mktemp("nc_datasets") for variable_name, description in variable_descriptions.items(): - filename = _create_and_write_dataset(variable_name, folder=tmpdir, **description, seed=1) + filename = _create_and_write_dataset( + variable_name, folder=tmpdir, **description, seed=1 + ) datasets[variable_name] = filename # With missing values @@ -175,7 +177,7 @@ def netcdf_sdba_ds(request, tmp_path_factory) -> tuple[dict[str, Path], DataArra y = yd.ppf(u) # Test train - tmpdir = tmp_path_factory.mktemp('nc_sdba_datasets') + tmpdir = tmp_path_factory.mktemp("nc_sdba_datasets") out["qdm_tas_hist"] = _write_dataset("qdm_tas_hist", series(x, "tas"), tmpdir) out["qdm_tas_ref"] = _write_dataset("qdm_tas_ref", series(y, "tas"), tmpdir) out["qdm_pr_hist"] = _write_dataset("qdm_pr_hist", series(x, "pr"), tmpdir) @@ -189,7 +191,7 @@ def client(tmp_path_factory): service = finch.wsgi.create_app(cfgfiles=CFG_FILE) # overwrite output path from defaults.cfg - outputpath = tmp_path_factory.mktemp('wps_outputs') + outputpath = tmp_path_factory.mktemp("wps_outputs") configuration.CONFIG.set("server", "outputurl", f"file://{outputpath}") configuration.CONFIG.set("server", "outputpath", str(outputpath)) @@ -235,4 +237,8 @@ def hourly_dataset(tmp_path_factory): # noqa: F811 """Ten days of precip with first hour missing.""" a = np.arange(10 * 24.0) a[0] = np.nan - return _write_dataset("pr_hr", timeseries(values=a, variable="pr", freq="H"), tmp_path_factory.mktemp('hourly_ds')) + return _write_dataset( + "pr_hr", + timeseries(values=a, variable="pr", freq="H"), + tmp_path_factory.mktemp("hourly_ds"), + ) From 659c14998113dc2cd1b401e674f99c1ddca37efa Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 9 Dec 2024 18:06:35 -0500 Subject: [PATCH 24/32] fix f-string pep8 --- finch/processes/utils.py | 2 +- tests/conftest.py | 16 +++++++++++----- tests/test_wps_ensemble.py | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/finch/processes/utils.py b/finch/processes/utils.py index 64d9bb9f..78d952e8 100644 --- a/finch/processes/utils.py +++ b/finch/processes/utils.py @@ -513,7 +513,7 @@ def is_opendap_url(url): So we need to let the netCDF4 library actually open the file. """ parts = urlparse(url) - meta_url = urlunparse([parts[0], parts[1], parts[2] + '.dds', None, None, None]) + meta_url = urlunparse([parts[0], parts[1], parts[2] + ".dds", None, None, None]) req = urllib.request.Request(meta_url, method="HEAD") try: diff --git a/tests/conftest.py b/tests/conftest.py index 3efe68af..e2c69a10 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -129,9 +129,11 @@ def _write_dataset(variable, ds, folder) -> Path: def netcdf_datasets(request, tmp_path_factory) -> dict[str, Path]: """Returns a Dict mapping a variable name to a corresponding netcdf path""" datasets = dict() - tmpdir = tmp_path_factory.mktemp('nc_datasets') + tmpdir = tmp_path_factory.mktemp("nc_datasets") for variable_name, description in variable_descriptions.items(): - filename = _create_and_write_dataset(variable_name, folder=tmpdir, **description, seed=1) + filename = _create_and_write_dataset( + variable_name, folder=tmpdir, **description, seed=1 + ) datasets[variable_name] = filename # With missing values @@ -175,7 +177,7 @@ def netcdf_sdba_ds(request, tmp_path_factory) -> tuple[dict[str, Path], DataArra y = yd.ppf(u) # Test train - tmpdir = tmp_path_factory.mktemp('nc_sdba_datasets') + tmpdir = tmp_path_factory.mktemp("nc_sdba_datasets") out["qdm_tas_hist"] = _write_dataset("qdm_tas_hist", series(x, "tas"), tmpdir) out["qdm_tas_ref"] = _write_dataset("qdm_tas_ref", series(y, "tas"), tmpdir) out["qdm_pr_hist"] = _write_dataset("qdm_pr_hist", series(x, "pr"), tmpdir) @@ -189,7 +191,7 @@ def client(tmp_path_factory): service = finch.wsgi.create_app(cfgfiles=CFG_FILE) # overwrite output path from defaults.cfg - outputpath = tmp_path_factory.mktemp('wps_outputs') + outputpath = tmp_path_factory.mktemp("wps_outputs") configuration.CONFIG.set("server", "outputurl", f"file://{outputpath}") configuration.CONFIG.set("server", "outputpath", str(outputpath)) @@ -235,4 +237,8 @@ def hourly_dataset(tmp_path_factory): # noqa: F811 """Ten days of precip with first hour missing.""" a = np.arange(10 * 24.0) a[0] = np.nan - return _write_dataset("pr_hr", timeseries(values=a, variable="pr", freq="H"), tmp_path_factory.mktemp('hourly_ds')) + return _write_dataset( + "pr_hr", + timeseries(values=a, variable="pr", freq="H"), + tmp_path_factory.mktemp("hourly_ds"), + ) diff --git a/tests/test_wps_ensemble.py b/tests/test_wps_ensemble.py index 55f4434d..01fc4c95 100644 --- a/tests/test_wps_ensemble.py +++ b/tests/test_wps_ensemble.py @@ -198,7 +198,7 @@ def test_ensemble_spatial_avg_poly_noperc(client): assert dims == exp_dims ensemble_variables = {k: v for k, v in ds.data_vars.items()} - assert sorted(ensemble_variables) == [f"tg_mean"] + assert sorted(ensemble_variables) == ["tg_mean"] for var in ensemble_variables.values(): variable_dims = {d: s for d, s in zip(var.dims, var.shape)} for d, v in exp_dims.items(): From 338b37b92ab7096fe1008bc399c25d735829aab5 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 9 Dec 2024 18:18:22 -0500 Subject: [PATCH 25/32] drop 3.9, add 3.12 --- .github/workflows/main.yml | 2 +- .pre-commit-config.yaml | 8 ++++---- environment-docs.yml | 2 +- setup.cfg | 2 +- setup.py | 3 +-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 363908c7..7da5b9b3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,7 +45,7 @@ jobs: shell: bash -l {0} strategy: matrix: - python-version: [ "3.9", "3.10", "3.11" ] # "3.12" + python-version: [ "3.10", "3.11", "3.12"] steps: - name: Harden Runner uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 71faf6b9..a1a932d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: rev: v3.17.0 hooks: - id: pyupgrade - args: [ '--py39-plus' ] + args: [ '--py310-plus' ] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: @@ -30,7 +30,7 @@ repos: rev: 24.10.0 hooks: - id: black - args: [ '--target-version=py39' ] + args: [ '--target-version=py310' ] - repo: https://github.com/pycqa/flake8 rev: 7.1.1 hooks: @@ -46,10 +46,10 @@ repos: rev: 1.8.7 hooks: - id: nbqa-black - args: [ '--target-version=py39' ] + args: [ '--target-version=py310' ] additional_dependencies: [ 'black==24.8.0' ] - id: nbqa-pyupgrade - args: [ '--py39-plus' ] + args: [ '--py310-plus' ] additional_dependencies: [ 'pyupgrade==3.17.0' ] - id: nbqa-isort args: [ '--settings-file=setup.cfg' ] diff --git a/environment-docs.yml b/environment-docs.yml index c399e471..fbddfb07 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -2,7 +2,7 @@ name: finch channels: - conda-forge dependencies: - - python >=3.9,<3.13 + - python >=3.10,<3.13 - anyascii >=0.3.0 - birdy >=0.8.1 - ipython >=8.0.0 diff --git a/setup.cfg b/setup.cfg index d7b91233..01f9ea95 100644 --- a/setup.cfg +++ b/setup.cfg @@ -66,6 +66,6 @@ match = ((?!(test_|conftest|conf|locustfile)).)*\.py [isort] profile = black -py_version = 39 +py_version = 310 append_only = true known_first_party = finch,_common,_utils diff --git a/setup.py b/setup.py index 10661c2a..3916d754 100644 --- a/setup.py +++ b/setup.py @@ -29,10 +29,9 @@ "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", - # "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Atmospheric Science", ] From 30ac9bbfd51643048b467f36499c3dc526745fe7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 23:18:45 +0000 Subject: [PATCH 26/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- finch/processes/ensemble_utils.py | 22 +++++++++++----------- finch/processes/utils.py | 20 ++++++++++---------- finch/processes/wps_base.py | 2 +- finch/processes/wps_xclim_indices.py | 2 +- finch/processes/wpsio.py | 4 +--- tests/conftest.py | 2 +- 6 files changed, 25 insertions(+), 27 deletions(-) diff --git a/finch/processes/ensemble_utils.py b/finch/processes/ensemble_utils.py index c14b3a61..3dc889f4 100644 --- a/finch/processes/ensemble_utils.py +++ b/finch/processes/ensemble_utils.py @@ -75,9 +75,9 @@ class Dataset: # noqa: D101 model: str scenario: str frequency: str = "day" - realization: Optional[str] = None - date_start: Optional[str] = None - date_end: Optional[str] = None + realization: str | None = None + date_start: str | None = None + date_end: str | None = None @classmethod def from_filename(cls, filename, pattern): # noqa: D102 @@ -90,10 +90,10 @@ def from_filename(cls, filename, pattern): # noqa: D102 def file_is_required( filename: str, pattern: str, - model_lists: Optional[dict[str, list[str]]] = None, + model_lists: dict[str, list[str]] | None = None, variables: list[str] = None, scenario: str = None, - models: list[Union[str, tuple[str, int]]] = None, + models: list[str | tuple[str, int]] = None, ): """Parse metadata and filter datasets.""" file = Dataset.from_filename(filename, pattern) @@ -181,9 +181,9 @@ def _make_resource_input(url: str, workdir: str, local: bool): def get_datasets( dsconf: DatasetConfiguration, workdir: str, - variables: Optional[list[str]] = None, - scenario: Optional[str] = None, - models: Optional[list[str]] = None, + variables: list[str] | None = None, + scenario: str | None = None, + models: list[str] | None = None, ) -> list[PywpsInput]: """Parse a directory to find files and filters the list to return only the needed ones, as resource inputs. @@ -219,7 +219,7 @@ def get_datasets( return inputs -def _formatted_coordinate(value) -> Optional[str]: +def _formatted_coordinate(value) -> str | None: """Return the first float value. The value can be a comma separated list of floats or a single float. @@ -356,8 +356,8 @@ def make_file_groups(files_list: list[Path], variables: set) -> list[dict[str, P def make_ensemble( files: list[Path], percentiles: list[int], - spatavg: Optional[bool] = False, - region: Optional[dict] = None, + spatavg: bool | None = False, + region: dict | None = None, ) -> None: # noqa: D103 ensemble = ensembles.create_ensemble( files, realizations=[file.stem for file in files] diff --git a/finch/processes/utils.py b/finch/processes/utils.py index 78d952e8..3bdd3e61 100644 --- a/finch/processes/utils.py +++ b/finch/processes/utils.py @@ -4,13 +4,13 @@ import os import urllib.request import zipfile -from collections.abc import Generator, Iterable +from collections.abc import Callable, Generator, Iterable from dataclasses import dataclass, field from datetime import datetime, timedelta from itertools import chain from multiprocessing.pool import ThreadPool from pathlib import Path -from typing import Callable, Deque, Optional, Union +from typing import Deque, Optional, Union from urllib.error import URLError from urllib.parse import urlparse, urlunparse @@ -528,7 +528,7 @@ def is_opendap_url(url): return False -def single_input_or_none(inputs, identifier) -> Optional[str]: +def single_input_or_none(inputs, identifier) -> str | None: """Return first input item.""" try: return inputs[identifier][0].data @@ -537,10 +537,10 @@ def single_input_or_none(inputs, identifier) -> Optional[str]: def netcdf_file_list_to_csv( - netcdf_files: Union[list[Path], list[str]], + netcdf_files: list[Path] | list[str], output_folder, filename_prefix, - csv_precision: Optional[int] = None, + csv_precision: int | None = None, ) -> tuple[list[Path], str]: """Write csv files for a list of netcdf files. @@ -795,7 +795,7 @@ def fix_broken_time_index(ds: xr.Dataset): def dataset_to_netcdf( - ds: xr.Dataset, output_path: Union[Path, str], compression_level=0 + ds: xr.Dataset, output_path: Path | str, compression_level=0 ) -> None: """Write an :py:class:`xarray.Dataset` dataset to disk, optionally using compression.""" encoding = {} @@ -818,9 +818,9 @@ def dataset_to_netcdf( def update_history( hist_str: str, - *inputs_list: Union[xr.DataArray, xr.Dataset], - new_name: Optional[str] = None, - **inputs_kws: Union[xr.DataArray, xr.Dataset], + *inputs_list: xr.DataArray | xr.Dataset, + new_name: str | None = None, + **inputs_kws: xr.DataArray | xr.Dataset, ): r"""Return a history string with the timestamped message and the combination of the history of all inputs. @@ -866,7 +866,7 @@ def update_history( return merged_history -def valid_filename(name: Union[Path, str]) -> Union[Path, str]: +def valid_filename(name: Path | str) -> Path | str: """Remove unsupported characters from a filename. Returns diff --git a/finch/processes/wps_base.py b/finch/processes/wps_base.py index d852411e..d35d76f6 100644 --- a/finch/processes/wps_base.py +++ b/finch/processes/wps_base.py @@ -140,7 +140,7 @@ def make_xclim_indicator_process( def convert_xclim_inputs_to_pywps( params: dict, parent=None, parse_percentiles: bool = False -) -> tuple[list[Union[LiteralInput, ComplexInput]], list[Any]]: +) -> tuple[list[LiteralInput | ComplexInput], list[Any]]: r"""Convert xclim indicators properties to pywps inputs. If parse_percentiles is True, percentile variables (\*_per) are dropped and replaced by diff --git a/finch/processes/wps_xclim_indices.py b/finch/processes/wps_xclim_indices.py index f1486c45..ca31f46a 100644 --- a/finch/processes/wps_xclim_indices.py +++ b/finch/processes/wps_xclim_indices.py @@ -180,7 +180,7 @@ def _log(message, percentage): def _make_unique_drs_filename( - ds: xr.Dataset, existing_names: list[str], output_name: Optional[str] = None + ds: xr.Dataset, existing_names: list[str], output_name: str | None = None ): """Generate a drs filename: avoid overwriting files by adding a dash and a number to the filename.""" if output_name is not None: diff --git a/finch/processes/wpsio.py b/finch/processes/wpsio.py index 40c17e17..c568d79b 100644 --- a/finch/processes/wpsio.py +++ b/finch/processes/wpsio.py @@ -20,9 +20,7 @@ from .utils import PywpsInput, PywpsOutput, get_datasets_config -def copy_io( - io: Union[PywpsInput, PywpsOutput], **kwargs -) -> Union[PywpsInput, PywpsOutput]: +def copy_io(io: PywpsInput | PywpsOutput, **kwargs) -> PywpsInput | PywpsOutput: """Create a new input or output with modified parameters. Use this if you want one of the inputs in this file, but want to modify it. diff --git a/tests/conftest.py b/tests/conftest.py index e2c69a10..91a65540 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ def _create_test_dataset( cell_methods: str, standard_name: str, units: str, - seed: Optional[Union[int, float, np.ndarray]] = None, + seed: int | float | np.ndarray | None = None, missing: bool = False, ): """Create a synthetic dataset for variable. From a0fd763f089da3974c7eab061b115a9db86a54dd Mon Sep 17 00:00:00 2001 From: tlogan2000 Date: Thu, 12 Dec 2024 16:34:12 -0500 Subject: [PATCH 27/32] update changelog --- CHANGELOG.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f6985bf0..4d7a9a03 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,9 +10,10 @@ v0.13.0 (unreleased) * Permissions have been set for all workflows. * Pinned `cf-xarray`` below v0.9.3. * Reformatted and renamed `CHANGES.rst` to `CHANGELOG.rst`. -* Dropped support for Python 3.8. The supported versions are now Python 3.9, 3.10, and 3.11. +* Dropped support for Python 3.8 and 3.9. The supported versions are now Python 3.10, 3.11 and 3.12. * `black` has been updated to v2024.8.0, and coding conventions have been updated to Python3.9+. * Several dependencies now are pinned to baseline versions, including `anyascii`, `dask`, `ipython`, `matplotlib`, `nbsphinx` `numpy`, and `sphinxcontrib-bibtex`. +* Added `xscen` dependency. Now used in spatial averaging of ensemble processes. v0.12.1 (2024-06-25) -------------------- From 1705fb1f64678a2fd16f3a9ff90b90085e98ea8c Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:00:35 -0500 Subject: [PATCH 28/32] better formatting of YAML, synchronize dependencies --- .codacy.yml | 6 +- .github/workflows/main.yml | 4 +- .readthedocs.yml | 6 +- .yamllint.yaml | 33 +++++++++- environment-docs.yml | 2 +- environment.yml | 3 +- finch/datasets.yml | 124 ++++++++++++++++++------------------- tests/test_data.yml | 82 ++++++++++++------------ 8 files changed, 145 insertions(+), 115 deletions(-) diff --git a/.codacy.yml b/.codacy.yml index 9700d78b..34195902 100644 --- a/.codacy.yml +++ b/.codacy.yml @@ -1,8 +1,8 @@ --- engines: - pylint: - enabled: true - python_version: 3 + pylint: + enabled: true + python_version: 3 exclude_paths: - 'tests/**' - 'docs/source/conf.py' diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 13a92ea9..a174e7f9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,8 +44,8 @@ jobs: run: shell: bash -l {0} strategy: - matrix: - python-version: [ "3.10", "3.11", "3.12"] + matrix: + python-version: [ "3.10", "3.11", "3.12" ] steps: - name: Harden Runner uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 diff --git a/.readthedocs.yml b/.readthedocs.yml index 2251c188..f4ab8407 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -13,12 +13,12 @@ sphinx: fail_on_warning: false # Optionally build your docs in additional formats such as PDF and ePub -formats: [] +formats: [ ] build: - os: ubuntu-22.04 + os: ubuntu-lts-latest tools: - python: "mambaforge-22.9" + python: "mambaforge-latest" conda: environment: environment-docs.yml diff --git a/.yamllint.yaml b/.yamllint.yaml index 2f3b4a3d..83bb52ad 100644 --- a/.yamllint.yaml +++ b/.yamllint.yaml @@ -1,8 +1,39 @@ --- rules: + + brackets: + forbid: false + min-spaces-inside: 1 + max-spaces-inside: 1 + + commas: + min-spaces-after: 1 + document-start: disable + + float-values: + require-numeral-before-decimal: true + + hyphens: + max-spaces-after: 1 + + indentation: + indent-sequences: whatever + spaces: consistent + + key-duplicates: + forbid-duplicated-merge-keys: true + line-length: - max: 120 + allow-non-breakable-words: true + allow-non-breakable-inline-mappings: true + max: 140 level: warning + + new-lines: + type: unix + + trailing-spaces: {} + truthy: disable diff --git a/environment-docs.yml b/environment-docs.yml index fbddfb07..34280ec3 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -13,4 +13,4 @@ dependencies: - sphinx >=7.0.0 - sphinxcontrib-bibtex >=2.6.0 - xarray >=2023.01.0,<2023.11.0 - - xclim =0.43 # remember to match xclim version in requirements_docs.txt as well + - xclim ==0.52.2 # remember to match xclim version in requirements_docs.txt as well diff --git a/environment.yml b/environment.yml index 4800a61b..221f2318 100644 --- a/environment.yml +++ b/environment.yml @@ -2,7 +2,7 @@ name: finch channels: - conda-forge dependencies: - - python >=3.9,<3.13 + - python >=3.10,<3.13 - pip >=24.2.0 - anyascii >=0.3.0 - cftime >=1.4.1 @@ -12,7 +12,6 @@ dependencies: - dask >=2023.5.1 - distributed - geopandas !=0.13.1 - - h5netcdf - netcdf4 <=1.6.5 - numpy >=1.23.0 - pandas >=2.2.0 diff --git a/finch/datasets.yml b/finch/datasets.yml index 0c47ceee..83b56221 100644 --- a/finch/datasets.yml +++ b/finch/datasets.yml @@ -6,8 +6,8 @@ candcs-u5: suffix: "*.nc" pattern: "{variable}_{frequency}_BCCAQv2+ANUSPLIN300_{model}_{scenario}_{realization}_{date_start}-{date_end}.nc" allowed_values: - scenario: [rcp26, rcp45, rcp85] - variable: [tasmin, tasmax, pr] + scenario: [ rcp26, rcp45, rcp85 ] + variable: [ tasmin, tasmax, pr ] model: &u5models - BNU-ESM @@ -39,18 +39,18 @@ candcs-u5: 24models: *u5models pcic12: # taken from: https://www.pacificclimate.org/data/statistically-downscaled-climate-scenarios - - [ ACCESS1-0 , r1i1p1 ] - - [ CCSM4 , r2i1p1 ] - - [ CNRM-CM5 , r1i1p1 ] - - [ CSIRO-Mk3-6-0 , r1i1p1 ] - - [ CanESM2 , r1i1p1 ] - - [ GFDL-ESM2G , r1i1p1 ] - - [ HadGEM2-CC , r1i1p1 ] - - [ HadGEM2-ES , r1i1p1 ] - - [ MIROC5 , r3i1p1 ] - - [ MPI-ESM-LR , r3i1p1 ] - - [ MRI-CGCM3 , r1i1p1 ] - - [ inmcm4 , r1i1p1 ] + - [ ACCESS1-0, r1i1p1 ] + - [ CCSM4, r2i1p1 ] + - [ CNRM-CM5, r1i1p1 ] + - [ CSIRO-Mk3-6-0, r1i1p1 ] + - [ CanESM2, r1i1p1 ] + - [ GFDL-ESM2G, r1i1p1 ] + - [ HadGEM2-CC, r1i1p1 ] + - [ HadGEM2-ES, r1i1p1 ] + - [ MIROC5, r3i1p1 ] + - [ MPI-ESM-LR, r3i1p1 ] + - [ MRI-CGCM3, r1i1p1 ] + - [ inmcm4, r1i1p1 ] # This is a copy of the above for backward compatibility reasons. bccaqv2: *candcsu5 @@ -62,7 +62,7 @@ candcs-u6: pattern: "{variable}_{frequency}_BCCAQv2+ANUSPLIN300_{model}_{scenario}_{realization}_{}_{date_start}-{date_end}.nc" allowed_values: scenario: [ ssp126, ssp245, ssp585 ] - variable: [ tasmin, tasmax, pr] + variable: [ tasmin, tasmax, pr ] model: - ACCESS-CM2 - ACCESS-ESM1-5 @@ -92,32 +92,32 @@ candcs-u6: - UKESM1-0-LL model_lists: 26models: - - ACCESS-CM2 - - ACCESS-ESM1-5 - - BCC-CSM2-MR - - CMCC-ESM2 - - CNRM-CM6-1 - - CNRM-ESM2-1 - - CanESM5 - - [EC-Earth3, r4i1p1f1] - - EC-Earth3-Veg - - FGOALS-g3 - - GFDL-ESM4 - - HadGEM3-GC31-LL - - INM-CM4-8 - - INM-CM5-0 - - IPSL-CM6A-LR - - [KACE-1-0-G, r2i1p1f1] - - KIOST-ESM - - MIROC-ES2L - - MIROC6 - - MPI-ESM1-2-HR - - MPI-ESM1-2-LR - - MRI-ESM2-0 - - NorESM2-LM - - NorESM2-MM - - TaiESM1 - - UKESM1-0-LL + - ACCESS-CM2 + - ACCESS-ESM1-5 + - BCC-CSM2-MR + - CMCC-ESM2 + - CNRM-CM6-1 + - CNRM-ESM2-1 + - CanESM5 + - [ EC-Earth3, r4i1p1f1 ] + - EC-Earth3-Veg + - FGOALS-g3 + - GFDL-ESM4 + - HadGEM3-GC31-LL + - INM-CM4-8 + - INM-CM5-0 + - IPSL-CM6A-LR + - [ KACE-1-0-G, r2i1p1f1 ] + - KIOST-ESM + - MIROC-ES2L + - MIROC6 + - MPI-ESM1-2-HR + - MPI-ESM1-2-LR + - MRI-ESM2-0 + - NorESM2-LM + - NorESM2-MM + - TaiESM1 + - UKESM1-0-LL humidex-daily: local: false @@ -125,28 +125,28 @@ humidex-daily: path: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/eccc/CCCS_humidex/Humidex/daily/catalog.html pattern: "{variable}_MBCn_ERA5-Land_{model}_{scenario}_{realization}_{date_start}-{date_end}.nc" allowed_values: - scenario: [ssp126, ssp245, ssp585] - variable: [HXmax] + scenario: [ ssp126, ssp245, ssp585 ] + variable: [ HXmax ] model: &hxmax_day_models - - MPI-ESM1-2-HR - - EC-Earth3 - - ACCESS-CM2 - - MIROC-ES2L - - EC-Earth3-Veg - - IPSL-CM6A-LR - - INM-CM5-0 - - FGOALS-g3 - - CanESM5 - - MIROC6 - - GISS-E2-1-G - - EC-Earth3-Veg-LR - - CNRM-ESM2-1 - - MPI-ESM1-2-LR - - INM-CM4-8 - - MRI-ESM2-0 - - CMCC-ESM2 - - ACCESS-ESM1-5 - - CNRM-CM6-1 + - MPI-ESM1-2-HR + - EC-Earth3 + - ACCESS-CM2 + - MIROC-ES2L + - EC-Earth3-Veg + - IPSL-CM6A-LR + - INM-CM5-0 + - FGOALS-g3 + - CanESM5 + - MIROC6 + - GISS-E2-1-G + - EC-Earth3-Veg-LR + - CNRM-ESM2-1 + - MPI-ESM1-2-LR + - INM-CM4-8 + - MRI-ESM2-0 + - CMCC-ESM2 + - ACCESS-ESM1-5 + - CNRM-CM6-1 model_lists: humidex_models: *hxmax_day_models diff --git a/tests/test_data.yml b/tests/test_data.yml index 9d70bc28..9205208f 100644 --- a/tests/test_data.yml +++ b/tests/test_data.yml @@ -5,8 +5,8 @@ test_single_cell: suffix: "*.nc" pattern: "{variable}_{frequency}_BCCAQv2+ANUSPLIN300_{model}_{scenario}_{realization}_{date_start}-{date_end}.nc" allowed_values: - scenario: [rcp26, rcp45, rcp85] - variable: [tasmin, tasmax, pr] + scenario: [ rcp26, rcp45, rcp85 ] + variable: [ tasmin, tasmax, pr ] model: &u5models - BNU-ESM @@ -37,19 +37,19 @@ test_single_cell: # The absence of realization specification implies r1 is taken. 24models: *u5models pcic12: - # taken from: https://www.pacificclimate.org/data/statistically-downscaled-climate-scenarios - - [ ACCESS1-0 , r1i1p1 ] - - [ CCSM4 , r2i1p1 ] - - [ CNRM-CM5 , r1i1p1 ] - - [ CSIRO-Mk3-6-0 , r1i1p1 ] - - [ CanESM2 , r1i1p1 ] - - [ GFDL-ESM2G , r1i1p1 ] - - [ HadGEM2-CC , r1i1p1 ] - - [ HadGEM2-ES , r1i1p1 ] - - [ MIROC5 , r3i1p1 ] - - [ MPI-ESM-LR , r3i1p1 ] - - [ MRI-CGCM3 , r1i1p1 ] - - [ inmcm4 , r1i1p1 ] + # taken from: https://www.pacificclimate.org/data/statistically-downscaled-climate-scenarios + - [ ACCESS1-0, r1i1p1 ] + - [ CCSM4, r2i1p1 ] + - [ CNRM-CM5, r1i1p1 ] + - [ CSIRO-Mk3-6-0, r1i1p1 ] + - [ CanESM2, r1i1p1 ] + - [ GFDL-ESM2G, r1i1p1 ] + - [ HadGEM2-CC, r1i1p1 ] + - [ HadGEM2-ES, r1i1p1 ] + - [ MIROC5, r3i1p1 ] + - [ MPI-ESM-LR, r3i1p1 ] + - [ MRI-CGCM3, r1i1p1 ] + - [ inmcm4, r1i1p1 ] test_subset: local: true @@ -58,8 +58,8 @@ test_subset: suffix: "*.nc" pattern: "{variable}_{model}_{scenario}_subset.nc" allowed_values: - scenario: [rcp26, rcp45] - variable: [tasmin, tasmax] + scenario: [ rcp26, rcp45 ] + variable: [ tasmin, tasmax ] model: - inmcm4 - bcc-csm1-1 @@ -72,28 +72,28 @@ test_humidex: suffix: "*.nc" pattern: "{variable}_MBCn_ERA5-Land_{model}_{scenario}_{realization}_{date_start}-{date_end}_sub.nc" allowed_values: - scenario: [ssp245, ssp585 ] - variable: [ HXmax ] - model: - &hxmax_day_sub - - MPI-ESM1-2-HR - - EC-Earth3 - - ACCESS-CM2 - - MIROC-ES2L - - EC-Earth3-Veg - - IPSL-CM6A-LR - - INM-CM5-0 - - FGOALS-g3 - - CanESM5 - - MIROC6 - - GISS-E2-1-G - - EC-Earth3-Veg-LR - - CNRM-ESM2-1 - - MPI-ESM1-2-LR - - INM-CM4-8 - - MRI-ESM2-0 - - CMCC-ESM2 - - ACCESS-ESM1-5 - - CNRM-CM6-1 + scenario: [ ssp245, ssp585 ] + variable: [ HXmax ] + model: + &hxmax_day_sub + - MPI-ESM1-2-HR + - EC-Earth3 + - ACCESS-CM2 + - MIROC-ES2L + - EC-Earth3-Veg + - IPSL-CM6A-LR + - INM-CM5-0 + - FGOALS-g3 + - CanESM5 + - MIROC6 + - GISS-E2-1-G + - EC-Earth3-Veg-LR + - CNRM-ESM2-1 + - MPI-ESM1-2-LR + - INM-CM4-8 + - MRI-ESM2-0 + - CMCC-ESM2 + - ACCESS-ESM1-5 + - CNRM-CM6-1 model_lists: - humidex_submods: *hxmax_day_sub + humidex_submods: *hxmax_day_sub From 143898b6b90baa8fd81cf1a979f55640c47766ef Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:09:46 -0500 Subject: [PATCH 29/32] update pandas version --- environment-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-docs.yml b/environment-docs.yml index 34280ec3..46327741 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -8,7 +8,7 @@ dependencies: - ipython >=8.0.0 - matplotlib-base >=3.5.0 - nbsphinx >=0.9.5 - - pandas >=1.5.3,<2.2.0 + - pandas >=2.2.0 - pywps >=4.5.1 - sphinx >=7.0.0 - sphinxcontrib-bibtex >=2.6.0 From 411a67ba8c9fbe928c49fc14ba20afa68c70a7be Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:13:48 -0500 Subject: [PATCH 30/32] update xarray version --- environment-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-docs.yml b/environment-docs.yml index 46327741..a07927ce 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -12,5 +12,5 @@ dependencies: - pywps >=4.5.1 - sphinx >=7.0.0 - sphinxcontrib-bibtex >=2.6.0 - - xarray >=2023.01.0,<2023.11.0 + - xarray >=2023.11.0 - xclim ==0.52.2 # remember to match xclim version in requirements_docs.txt as well From 1d99a07bafe34d79b89e60d0c286fd8ba125b243 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:19:12 -0500 Subject: [PATCH 31/32] add setuptools --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 221f2318..18397beb 100644 --- a/environment.yml +++ b/environment.yml @@ -22,6 +22,7 @@ dependencies: - pyyaml >=6.0.1 - scipy >=1.9.0 - sentry-sdk + - setuptools >=71.0.0 - siphon - xarray >=2023.11.0 - xclim =0.52.2 # remember to match xclim version in requirements_docs.txt as well From 8745a2999bf306ca18cb087155b7e5fa5a06ea27 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:31:36 -0500 Subject: [PATCH 32/32] allow connection to PyPI, fix setuptools version, specify Python3.10+ --- .github/workflows/docker-testing.yml | 1 + environment-docs.yml | 1 + setup.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker-testing.yml b/.github/workflows/docker-testing.yml index 4189760d..184895a8 100644 --- a/.github/workflows/docker-testing.yml +++ b/.github/workflows/docker-testing.yml @@ -27,6 +27,7 @@ jobs: allowed-endpoints: > auth.docker.io:443 conda.anaconda.org:443 + files.pythonhosted.org:443 github.com:443 production.cloudflare.docker.com:443 pypi.org:443 diff --git a/environment-docs.yml b/environment-docs.yml index a07927ce..f63965b7 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -10,6 +10,7 @@ dependencies: - nbsphinx >=0.9.5 - pandas >=2.2.0 - pywps >=4.5.1 + - setuptools >=71.0.0 - sphinx >=7.0.0 - sphinxcontrib-bibtex >=2.6.0 - xarray >=2023.11.0 diff --git a/setup.py b/setup.py index 3916d754..cadb5e86 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ here = os.path.abspath(os.path.dirname(__file__)) README = open(os.path.join(here, "README.rst")).read() CHANGES = open(os.path.join(here, "CHANGELOG.rst")).read() -REQUIRES_PYTHON = ">=3.9" +REQUIRES_PYTHON = ">=3.10" about = {} with open(os.path.join(here, "finch", "__version__.py")) as f: