Merge pull request #388 from gwmod/bathymetry

add RWS bathymetry to nlmod.read.rws
gwmod · Nov 18, 2024 · ecaa33b · ecaa33b
2 parents c4c3882 + 62cf72b
commit ecaa33b
Show file tree

Hide file tree

Showing 4 changed files with 165 additions and 10 deletions.
diff --git a/nlmod/dims/grid.py b/nlmod/dims/grid.py
@@ -132,7 +132,7 @@ def get_icell2d_from_xy(x, y, ds, gi=None, rotated=True):
 
     Raises
     ------
-
+    ValueError
         Raises a ValueError if the point is outside of the model grid.
 
     Returns
@@ -171,7 +171,6 @@ def xy_to_row_col(xy, ds):
     col : int
         number of the column value of a cell containing the xy point.
     """
-
     logger.warning(
         "nlmod.grid.xy_to_row_col is deprecated. "
         "Use nlmod.grid.get_row_col_from_xy instead"
@@ -203,7 +202,7 @@ def get_row_col_from_xy(x, y, ds, rotated=True, gi=None):
 
     Raises
     ------
-
+    ValueError
         Raises a ValueError if the point is outside of the model grid.
 
     Returns
@@ -242,7 +241,7 @@ def xyz_to_cid(xyz, ds=None, modelgrid=None):
     Parameters
     ----------
     xyz : list, tuple
-        coordinates of ta point.
+        coordinates of a point.
     ds : xr.Dataset
         model dataset.
     modelgrid : StructuredGrid, VertexGrid, optional

diff --git a/nlmod/read/rws.py b/nlmod/read/rws.py
@@ -1,13 +1,17 @@
 import datetime as dt
 import logging
 import os
+from typing import Optional, Union
 
 import geopandas as gpd
 import numpy as np
 import xarray as xr
+from rioxarray.merge import merge_arrays
+from tqdm.auto import tqdm
 
-from .. import cache, dims, util, NLMOD_DATADIR
-from . import jarkus
+from nlmod import NLMOD_DATADIR, cache, dims, util
+from nlmod.read import jarkus
+from nlmod.read.webservices import arcrest
 
 logger = logging.getLogger(__name__)
 
@@ -249,9 +253,7 @@ def calculate_sea_coverage(
     else:
         sea_dtm = sea_dtm.astype(float)
     if ds is not None:
-        sea = dims.structured_da_to_ds(
-            sea_dtm, ds, method=method, nodata=nodata
-        )
+        sea = dims.structured_da_to_ds(sea_dtm, ds, method=method, nodata=nodata)
         if (sea == nodata).any():
             logger.info(
                 "The dtm data does not cover the entire model domain."
@@ -260,3 +262,139 @@ def calculate_sea_coverage(
             sea = sea.where(sea != nodata, 1)
         return sea
     return sea_dtm
+
+
+def get_gdr_configuration() -> dict:
+    """Get configuration for GDR data.
+
+    Note: Currently only includes configuration for bathymetry data. Other datasets
+    can be added in the future. See
+    https://geo.rijkswaterstaat.nl/arcgis/rest/services/GDR/ for available data.
+
+    Returns
+    -------
+    config : dict
+        configuration dictionary containing urls and layer numbers for GDR data.
+    """
+    config = {}
+    config["bodemhoogte"] = {
+        "index": {
+            "url": (
+                "https://geo.rijkswaterstaat.nl/arcgis/rest/services/GDR/"
+                "bodemhoogte_index/FeatureServer"
+            )
+        },
+        "20m": {"layer": 0},
+        "1m": {"layer": 2},
+    }
+    return config
+
+
+def get_bathymetry_gdf(
+    resolution: str = "20m",
+    extent: Optional[list[float]] = None,
+    config: Optional[dict] = None,
+) -> gpd.GeoDataFrame:
+    """Get bathymetry dataframe from RWS.
+
+    Note that the 20m resolution does not contain bathymetry data for the major rivers.
+    If you need the bathymetry of the major rivers, use the 1m resolution.
+
+    Parameters
+    ----------
+    resolution : str, optional
+        resolution of the bathymetry data, "1m" or "20m". The default is "20m".
+    extent : tuple, optional
+        extent of the model domain. The default is None.
+    config : dict, optional
+        configuration dictionary containing urls and layer numbers for GDR data. The
+        default is None, which uses the default configuration provided by
+        the function `get_gdr_configuration()`.
+    """
+    if config is None:
+        config = get_gdr_configuration()
+    url = config["bodemhoogte"]["index"]["url"]
+    layer = config["bodemhoogte"][resolution]["layer"]
+    return arcrest(url, layer, extent=extent)
+
+
+@cache.cache_netcdf()
+def get_bathymetry(
+    extent: list[float],
+    resolution: str = "20m",
+    res: Optional[float] = None,
+    method: Optional[str] = None,
+    chunks: Optional[Union[str, dict[str, int]]] = "auto",
+    config: Optional[dict] = None,
+) -> xr.DataArray:
+    """Get bathymetry data from RWS.
+
+    Bathymetry is available at 20m resolution and at 1m resolution. The 20m
+    resolution is available for large water bodies, but not in the major rivers.
+    The 1m dataset covers the major waterbodies across all of the Netherlands.
+
+    Parameters
+    ----------
+    extent : tuple
+        extent of the model domain
+    resolution : str, optional
+        resolution of the bathymetry data, "1m" or "20m". The default is "20m".
+    res : float, optional
+        resolution of the output data array. The default is None, which uses
+        resolution of the input datasets. Resampling method is provided by the method
+        kwarg.
+    method : str, optional
+        resampling method. The default is None. See rasterio.enums.Resampling for
+        supported methods. Examples are ["min", "max", "mean", "nearest"].
+    chunks : dict, optional
+        chunks for the output data array. The default is "auto", which lets xarray/dask
+        pick the chunksize. Set to None to avoid chunking.
+    config : dict, optional
+        configuration dictionary containing urls and layer numbers for GDR data. The
+        default is None, which uses the default configuration provided by
+        the function `get_gdr_configuration()`.
+
+    Returns
+    -------
+    bathymetry : xr.DataArray
+        bathymetry data
+    """
+    gdf = get_bathymetry_gdf(resolution=resolution, extent=extent, config=config)
+
+    xmin, xmax, ymin, ymax = extent
+    dataarrays = []
+
+    for _, row in tqdm(
+        gdf.iterrows(), desc="Downloading bathymetry", total=gdf.index.size
+    ):
+        url = row["geotiff"]
+        # NOTE: link to 20m dataset is incorrect in the index
+        if resolution == "20m":
+            url = url.replace("Noordzee_20_LAT", "bodemhoogte_20mtr")
+        ds = xr.open_dataset(url, engine="rasterio")
+        ds = ds.assign_coords({"y": ds["y"].round(0), "x": ds["x"].round(0)})
+        da = (
+            ds["band_data"]
+            .sel(band=1, x=slice(xmin, xmax), y=slice(ymax, ymin))
+            .drop_vars("band")
+        )
+        if chunks:
+            da = da.chunk(chunks)
+        dataarrays.append(da)
+
+    if len(dataarrays) > 1:
+        da = merge_arrays(
+            dataarrays,
+            bounds=[xmin, ymin, xmax, ymax],
+            res=res,
+            method=method,
+        )
+    else:
+        da = dataarrays[0]
+        if res is not None:
+            da = da.rio.reproject(
+                da.rio.crs,
+                res=res,
+                resampling=method,
+            )
+    return da
diff --git a/nlmod/read/waterboard.py b/nlmod/read/waterboard.py
@@ -9,8 +9,13 @@
 
 def get_polygons(**kwargs):
     """Get the location of the Dutch Waterboards as a Polygon GeoDataFrame."""
-    url = "https://services.arcgis.com/nSZVuSZjHpEZZbRo/arcgis/rest/services/Waterschapsgrenzen/FeatureServer"
+    url = "https://services.arcgis.com/nSZVuSZjHpEZZbRo/ArcGIS/rest/services/Waterschapsgrenzen/FeatureServer"
     layer = 0
+    # NOTE: Referer needed to avoid Unauthorized Access error (500)
+    if "headers" not in kwargs:
+        kwargs["headers"] = {
+            "Referer": "https://services.arcgis.com/nSZVuSZjHpEZZbRo/ArcGIS/rest/services"
+        }
     ws = webservices.arcrest(url, layer, **kwargs)
     # remove different prefixes
     ws["waterschap"] = ws["waterschap"].str.replace("HH van ", "")

diff --git a/tests/test_028_rws_bathymetry.py b/tests/test_028_rws_bathymetry.py
@@ -0,0 +1,13 @@
+import pytest
+import nlmod
+
+
+@pytest.mark.parametrize("resolution", [1, 20])
+def test_bathymetry(resolution):
+    xmin = 25_000.0
+    ymin = 410_000.0
+    xmax = xmin + 2 * resolution
+    ymax = ymin + 2 * resolution
+    extent = [xmin, xmax, ymin, ymax]
+    nlmod.read.rws.get_bathymetry(extent, resolution=f"{resolution}m")
+