From a28ef50f3976c5c6f7b59fe757e52f4bd253f7ac Mon Sep 17 00:00:00 2001 From: Chris Marsh Date: Tue, 10 Dec 2024 11:14:54 -0600 Subject: [PATCH] Update docs with new NetCDF features --- docs/configuration.rst | 31 ++++- docs/forcing.rst | 249 ++++++++++++++++++++++++++++++----------- 2 files changed, 208 insertions(+), 72 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index fef3dd0b..bd666d6f 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -598,6 +598,11 @@ forcing Input forcing can be either a ASCII timeseries or a NetCDF. Please see :ref:`forcing` for more details. +.. warning:: + + Including json sub-config files in this section is not supported + + Input forcing stations do not need to be located within the simulation domain. Therefore they can act as ‘virtual stations’ so-as to use reanalysis data, or met stations located outside of the basin. @@ -776,17 +781,35 @@ Example NetCDF ~~~~~~~ -The use NetCDF as input creates virtual stations at the cell-centres. The NetCDF file is lazy loaded as required for each triangle, so only the values required are loaded. -The variable names, like for ASCII inputs, needs to correspond to the values expected by the filters. +The use a netCDF file, set ``forcing:use_netcdf=true`, and choose the netcdf file. + +.. code:: + + "forcing": + { + + "use_netcdf": true, + "file":"forcing_file.nc", + } +If a multipart file is to be used, simply replace the netcdf with the json list. + +.. code:: + + "forcing": + { + + "use_netcdf": true, + "file":"metadata.json", + } + +Please see the NetCDF :ref:`forcing` section for more details. .. warning:: NetCDF and ``point_mode`` are not supported. - - Filters ######## diff --git a/docs/forcing.rst b/docs/forcing.rst index e3292577..e2ec7e86 100644 --- a/docs/forcing.rst +++ b/docs/forcing.rst @@ -73,8 +73,40 @@ Various conversion scripts for other models’ input/output are located in the ` NetCDF ******** -The use NetCDF as input creates virtual stations at the cell-centres. The NetCDF file is lazy loaded as required for each triangle, so only the values required are loaded. -The variable names, like for ASCII inputs, needs to correspond to the values expected by the filters. +When using a NetCDF file as input, this creates virtual stations at the cell-centres. Only the stations that are +required by the mesh subset are loaded as specified by the ``option:station_N_nearest``, and timesteps are lazy loaded +as required. This ensures the memory foot print is low. In MPI mode, only the stations required for the current ranks' +mesh subset are loaded. The internal CHM variable names are mapped from CF compliant ``standard_name`` variable +attributes. If a variable doesn't have a ``standard_name`` or is not in this table, it is ignored and not loaded. + +Currently the following mappings are supported: + ++---------------+----------------------------------+---------------------+ +| Short Name | Standard Name | Units | ++===============+==================================+=====================+ +| t | air_temperature | C | ++---------------+----------------------------------+---------------------+ +| rh | relative_humidity | % | ++---------------+----------------------------------+---------------------+ +| t_lapse_rate | air_temperature_lapse_rate | C/m | ++---------------+----------------------------------+---------------------+ +| vw_dir | wind_from_direction | degrees from north | ++---------------+----------------------------------+---------------------+ +| U_R | wind_speed | m/s | ++---------------+----------------------------------+---------------------+ +| press | surface_air_pressure | Pa | ++---------------+----------------------------------+---------------------+ +| Qli | surface_downwelling_longwave_flux| W/m^2 | ++---------------+----------------------------------+---------------------+ +| Qsi | surface_downwelling_shortwave_flux| W/m^2 | ++---------------+----------------------------------+---------------------+ +| z | geopotential_height | m | ++---------------+----------------------------------+---------------------+ +| p | precipitation_amount | mm | ++---------------+----------------------------------+---------------------+ + + + An example of this is shown below, where each black point is a virtual station, representing the center for a NetCDF grid cell from a NWP product. @@ -84,27 +116,92 @@ An example of this is shown below, where each black point is a virtual station, NetCDF and ``point_mode`` are not supported. -Grid +The stations for each rank are written to the ouput folder in both shape file (.shp) as well as paraview point (.vtp). + +Notes ~~~~~~ -- the nc file is a regular grid of x,y values -- WGS84 lat/long -- consistent grid between model timesteps -- The underlying grid is specified in coordinates: ``ygrid_0`` and ``xgrid_0`` -- The lat/long is specified in variables ``gridlat_0`` and ``gridlon_0`` -- The elevation of the observation is given in ``HGT_P0_L1_GST`` (m) - -Timesteps -~~~~~~~~~~ - -- at least two timesteps -- named ``datetime`` -- time is in UTC+0 -- the difference between these is used to determine model dt -- timesteps are offsets from an epoch (format ``YYYY-mm-dd HH:MM:SS`` or ``YYYY-mm-ddTHH:MM:SS``) -- units are hours, minutes, seconds -- This is specified as the units: ``datetime:units = "hours since 2017-09-01 06:00:00" ;`` +- The Dimension and variable that corresponds to the dimension (i.e., the coordinate) must have the same name +- lat/lon can be either 1D or 2D -- 1D prefered due to being CF-compliant +- Assumed that the netcdf is in EPSG:4326 / WGS84 lat/lon +- Consistent grid between model timesteps, e.g., regular 1-hour forcing +- Time is in UTC+0 +- If a single timestep is present in a file, then the ``time:delta_t=`` and ``time:delta_t_units="s"`` +must be added to inform the model about the integration length of the forcing file. +- The elevation is generally given by the geopotential height, assumed to be in metres +- Time units: ``time:units = "hours since 2017-09-01 06:00:00" ;`` - offset are given as ``int64`` +- Variable units are not currently handled + +Multipart NetCDF files +~~~~~~~~~~~~~~~~~~~~~~~~ + +Multi-part netcdf files are allowed. This handles cases where the forcing data are split into a file per n-timesteps, +such as one netcdf file per day. This is specified as a ``.json`` file that has a list of files: + +.. code:: + + [ + { + "start_time": "20241101T000000", + "end_time": "20241101T000000", + "file_name": "/Users/cbm038/Documents/science/data/hrdps/CF-20241101T0000.nc" + }, + { + "start_time": "20241101T010000", + "end_time": "20241101T010000", + "file_name": "/Users/cbm038/Documents/science/data/hrdps/CF-20241101T0100.nc" + }, + { + "start_time": "20241101T020000", + "end_time": "20241101T020000", + "file_name": "/Users/cbm038/Documents/science/data/hrdps/CF-20241101T0200.nc" + }, + { + "start_time": "20241101T030000", + "end_time": "20241101T030000", + "file_name": "/Users/cbm038/Documents/science/data/hrdps/CF-20241101T0300.nc" + }, + { + "start_time": "20241101T040000", + "end_time": "20241101T040000", + "file_name": "/Users/cbm038/Documents/science/data/hrdps/CF-20241101T0400.nc" + }, + { + "start_time": "20241101T050000", + "end_time": "20241101T050000", + "file_name": "/Users/cbm038/Documents/science/data/hrdps/CF-20241101T0500.nc" + } + ] + +This json can be generated as follows: +.. code:: + + import glob + import natsort + import xarray as xr + + file_paths = natsort.natsorted(glob.glob('CF-2024*.nc')) + metadata_list = [] + for file_path in file_paths: + ds = xr.open_mfdataset(file_path) + + metadata = { + "start_time": str(ds.time.min().dt.strftime("%Y%m%dT%H%M%S").values), # Convert to string for JSON serialization + "end_time": str(ds.time.max().dt.strftime("%Y%m%dT%H%M%S").values), + "file_name": str(Path(file_path)) + } + + metadata_list.append(metadata) + + ds.close() + + print(metadata_list) + with open('metdata.json', 'w') as f: + json.dump(metadata_list, f, indent=4) + + +Instead of a netcdf file, choose the json file. Schema ~~~~~~~ @@ -113,51 +210,67 @@ In detail the following is the schema for the required NetCDF files: .. code:: - dimensions: - datetime = UNLIMITED ; - ygrid_0 = int ; - xgrid_0 = int ; - - variables: - double VAR_NAME(datetime, ygrid_0, xgrid_0) ; - VAR_NAME:_FillValue = NaN ; - VAR_NAME:coordinates = "gridlat_0 gridlon_0" ; - - double HGT_P0_L1_GST(datetime, ygrid_0, xgrid_0) ; - HGT_P0_L1_GST:_FillValue = NaN ; - HGT_P0_L1_GST:coordinates = "gridlat_0 gridlon_0" ; - - - int64 datetime(datetime) ; - datetime:standard_name = "time" ; - datetime:long_name = "Validity time" ; - datetime:axis = "T" ; - datetime:units = "hours since 2017-09-01 06:00:00" ; - datetime:calendar = "proleptic_gregorian" ; - - double gridlat_0(ygrid_0, xgrid_0) ; - gridlat_0:_FillValue = NaN ; - gridlat_0:long_name = "latitude" ; - gridlat_0:standard_name = "latitude" ; - gridlat_0:units = "degrees_north" ; - - double gridlon_0(ygrid_0, xgrid_0) ; - gridlon_0:_FillValue = NaN ; - gridlon_0:long_name = "longitude" ; - gridlon_0:standard_name = "longitude" ; - gridlon_0:units = "degrees_east" ; - - - double xgrid_0(xgrid_0) ; - xgrid_0:_FillValue = NaN ; - xgrid_0:long_name = "longitude ; - xgrid_0:standard_name = "longitude" ; - xgrid_0:units = "degrees" ; - xgrid_0:axis = "X" ; - - double ygrid_0(ygrid_0) ; - ygrid_0:_FillValue = NaN ; - ygrid_0:long_name = "latitude" ; - ygrid_0:standard_name = "latitude" ; - ygrid_0:units = "degrees" ; - ygrid_0:axis = "Y" ; \ No newline at end of file +$ ncdump -h /Users/cbm038/Documents/science/data/hrdps/CF-20241101T0000.nc + netcdf CF-20241101T0000 { + dimensions: + time = 1 ; + latitude = 1309 ; + longitude = 3384 ; + string1 = 1 ; + variables: + float FI(time, latitude, longitude) ; + FI:_FillValue = NaNf ; + FI:long_name = "Incoming longwave radiation at the surface" ; + FI:standard_name = "surface_downwelling_longwave_flux" ; + FI:units = "W/m2" ; + FI:coordinates = "time latitude longitude" ; + char crs(time, string1) ; + crs:grid_mapping_name = "latitude_longitude" ; + crs:long_name = "CRS definition" ; + crs:longitude_of_prime_meridian = 0. ; + crs:semi_major_axis = 6378137. ; + crs:inverse_flattening = 298.257223563 ; + crs:spatial_ref = "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]" ; + crs:crs_wkt = "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]" ; + crs:GeoTransform = "-152.7684898376465 0.03312879297980011 0 70.62275695800781 0 -0.03312879297980011 " ; + crs:coordinates = "time latitude longitude" ; + double latitude(latitude) ; + latitude:_FillValue = NaN ; + latitude:standard_name = "latitude" ; + latitude:long_name = "latitude" ; + latitude:units = "degrees_north" ; + double longitude(longitude) ; + longitude:_FillValue = NaN ; + longitude:standard_name = "longitude" ; + longitude:long_name = "longitude" ; + longitude:units = "degrees_east" ; + int64 time(time) ; + time:standard_name = "time" ; + time:delta_t = 3600LL ; + time:delta_t_units = "s" ; + time:units = "days since 2024-11-01" ; + time:calendar = "proleptic_gregorian" ; + double wind_speed(time, latitude, longitude) ; + wind_speed:_FillValue = NaN ; + wind_speed:standard_name = "wind_speed" ; + wind_speed:long_name = "Surface wind speed" ; + wind_speed:units = "m s**-1" ; + wind_speed:coordinates = "time latitude longitude" ; + double wind_from_direction(time, latitude, longitude) ; + wind_from_direction:_FillValue = NaN ; + wind_from_direction:standard_name = "wind_from_direction" ; + wind_from_direction:long_name = "Surface wind direction" ; + wind_from_direction:units = "degree" ; + wind_from_direction:coordinates = "time latitude longitude" ; + float RH(time, latitude, longitude) ; + RH:_FillValue = NaNf ; + RH:standard_name = "relative_humidity" ; + RH:long_name = "surface relative humidity" ; + RH:units = "1" ; + RH:coordinates = "time latitude longitude" ; + + // global attributes: + :Conventions = "CF-1.7" ; + } + +