Knmi stations (#174)

* Add option to only use most common station Slow determination of stations (locations.geo.get_nearest_point) is removed * solve minor bug and add test for most_common_station=True * Set gridgen files in a separate directory. Solves issue #158 * minor changes * Make most_common_station work with structured grids as well * Create gridgen directory when it does not exist * Solve fiona.errors.DriverError: not recognized as a supported file format. * Remove rasterstats as a dependency for tests and docs * Codacy stuff
gwmod · May 9, 2023 · f1c920e · f1c920e
1 parent ad39be6
commit f1c920e
Show file tree

Hide file tree

Showing 9 changed files with 81 additions and 42 deletions.
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
@@ -121,4 +121,4 @@ potential solutions.
 On top of that there are some optional dependecies, only needed (and imported) in a single method:
 
 - bottleneck (used in calculate_gxg)
--  geocube (used in add_min_ahn_to_gdf)
+- geocube (used in add_min_ahn_to_gdf)
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -3,7 +3,6 @@ Ipython
 ipykernel
 nbsphinx
 netCDF4==1.5.7
-rasterstats
 geocube
 bottleneck
 contextily
diff --git a/nlmod/dims/grid.py b/nlmod/dims/grid.py
@@ -9,7 +9,7 @@
 import logging
 import os
 import warnings
-
+import os
 import flopy
 import geopandas as gpd
 import numpy as np

diff --git a/nlmod/dims/layers.py b/nlmod/dims/layers.py
@@ -47,6 +47,10 @@ def calculate_thickness(ds, top="top", bot="botm"):
         else:
             raise ValueError("2d top should have same last dimension as bot")
     if isinstance(ds[bot], xr.DataArray):
+        if hasattr(ds[bot], "long_name"):
+            thickness.attrs["long_name"] = "thickness"
+        if hasattr(ds[bot], "standard_name"):
+            thickness.attrs["standard_name"] = "thickness_of_layer"
         if hasattr(ds[bot], "units"):
             if ds[bot].units == "mNAP":
                 thickness.attrs["units"] = "m"

diff --git a/nlmod/dims/resample.py b/nlmod/dims/resample.py
@@ -375,11 +375,12 @@ def vertex_da_to_ds(da, ds, method="nearest"):
     Parameters
     ----------
     da : xaray.DataArray
-        A vertex DataArray. When the DataArray does not have 'icell2d' as a
-        dimension, the original DataArray is retured. The DataArray da can
-        contain other dimensions as well (for example 'layer' or time'' ).
+        A vertex DataArray. When the DataArray does not have 'icell2d' as a dimension,
+        the original DataArray is retured. The DataArray da can contain other dimensions
+        as well (for example 'layer' or time'' ).
     ds : xarray.Dataset
-        The model dataset with coordinates x and y.
+        The model dataset to which the DataArray needs to be resampled, with coordinates
+        x and y.
     method : str, optional
         The interpolation method, see griddata. The default is "nearest".
 

diff --git a/nlmod/read/knmi.py b/nlmod/read/knmi.py
@@ -2,6 +2,7 @@
 import logging
 
 import hydropandas as hpd
+from hydropandas.io import knmi as hpd_knmi
 import numpy as np
 import pandas as pd
 
@@ -13,7 +14,7 @@
 
 
 @cache.cache_netcdf
-def get_recharge(ds, method="linear"):
+def get_recharge(ds, method="linear", most_common_station=False):
     """add multiple recharge packages to the groundwater flow model with knmi
     data by following these steps:
 
@@ -41,6 +42,9 @@ def get_recharge(ds, method="linear"):
         If 'linear', calculate recharge by subtracting evaporation from precipitation.
         If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
         The defaults is 'linear'.
+    most_common_station : bool, optional
+        When True, only download data from the station that is most common in the model
+        area. The default is False
 
     Returns
     -------
@@ -51,7 +55,7 @@ def get_recharge(ds, method="linear"):
     if "time" not in ds:
         raise (
             AttributeError(
-                "'Dataset' object has no attribute 'time'. "
+                "'Dataset' object has no 'time' dimension. "
                 "Please run nlmod.time.set_ds_time()"
             )
         )
@@ -71,50 +75,45 @@ def get_recharge(ds, method="linear"):
     shape = [len(ds_out[dim]) for dim in dims]
 
     locations, oc_knmi_prec, oc_knmi_evap = get_knmi_at_locations(
-        ds, start=start, end=end
-    )
-
-    # add closest precipitation and evaporation measurement station to each cell
-    locations[["prec_point", "distance"]] = locations.geo.get_nearest_point(
-        oc_knmi_prec
-    )
-    locations[["evap_point", "distance"]] = locations.geo.get_nearest_point(
-        oc_knmi_evap
+        ds, start=start, end=end, most_common_station=most_common_station
     )
 
     if method in ["linear"]:
         ds_out["recharge"] = dims, np.zeros(shape)
 
         # find unique combination of precipitation and evaporation station
-        unique_combinations = locations.drop_duplicates(["prec_point", "evap_point"])[
-            ["prec_point", "evap_point"]
+        unique_combinations = locations.drop_duplicates(["stn_rd", "stn_ev24"])[
+            ["stn_rd", "stn_ev24"]
         ].values
 
-        for prec_stn, evap_stn in unique_combinations:
+        for stn_rd, stn_ev24 in unique_combinations:
             # get locations with the same prec and evap station
-            loc_sel = locations.loc[
-                (locations["prec_point"] == prec_stn)
-                & (locations["evap_point"] == evap_stn)
-            ]
+            mask = (locations["stn_rd"] == stn_rd) & (locations["stn_ev24"] == stn_ev24)
+            loc_sel = locations.loc[mask]
 
             # calculate recharge time series
-            prec = oc_knmi_prec.loc[prec_stn, "obs"]["RD"].resample("D").nearest()
-            evap = oc_knmi_evap.loc[evap_stn, "obs"]["EV24"].resample("D").nearest()
+            index = oc_knmi_prec.index[oc_knmi_prec.station == stn_rd][0]
+            prec = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
+            index = oc_knmi_evap.index[oc_knmi_evap.station == stn_ev24][0]
+            evap = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
             ts = (prec - evap).dropna()
             ts.name = f"{prec.name}-{evap.name}"
 
             _add_ts_to_ds(ts, loc_sel, "recharge", ds_out)
 
     elif method == "separate":
         ds_out["recharge"] = dims, np.zeros(shape)
-        for stn in locations["prec_point"].unique():
-            ts = oc_knmi_prec.loc[stn, "obs"]["RD"]
-            loc_sel = locations.loc[(locations["prec_point"] == stn)]
+        for stn in locations["stn_rd"].unique():
+            index = oc_knmi_prec.index[oc_knmi_prec.station == stn][0]
+            ts = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
+            loc_sel = locations.loc[(locations["stn_rd"] == stn)]
             _add_ts_to_ds(ts, loc_sel, "recharge", ds_out)
+
         ds_out["evaporation"] = dims, np.zeros(shape)
-        for stn in locations["evap_point"].unique():
-            ts = oc_knmi_evap.loc[stn, "obs"]["EV24"]
-            loc_sel = locations.loc[(locations["evap_point"] == stn)]
+        for stn in locations["stn_ev24"].unique():
+            index = oc_knmi_evap.index[oc_knmi_evap.station == stn][0]
+            ts = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
+            loc_sel = locations.loc[(locations["stn_ev24"] == stn)]
             _add_ts_to_ds(ts, loc_sel, "evaporation", ds_out)
     else:
         raise (Exception(f"Unknown method: {method}"))
@@ -225,7 +224,7 @@ def get_locations_structured(ds):
     return locations
 
 
-def get_knmi_at_locations(ds, start="2010", end=None):
+def get_knmi_at_locations(ds, start="2010", end=None, most_common_station=False):
     """get knmi data at the locations of the active grid cells in ds.
 
     Parameters
@@ -258,14 +257,29 @@ def get_knmi_at_locations(ds, start="2010", end=None):
         locations = get_locations_vertex(ds)
     else:
         raise ValueError("gridtype should be structured or vertex")
+    locations["stn_rd"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
+    locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="EV24")
+    if most_common_station:
+        if ds.gridtype == "structured":
+            # set the most common station to all locations
+            locations["stn_rd"] = locations["stn_rd"].value_counts().idxmax()
+            locations["stn_ev24"] = locations["stn_ev24"].value_counts().idxmax()
+        else:
+            # set the station with the largest area to all locations
+            locations["area"] = ds["area"].loc[locations.index]
+            locations["stn_rd"] = locations.groupby("stn_rd").sum()["area"].idxmax()
+            locations["stn_ev24"] = locations.groupby("stn_ev24").sum()["area"].idxmax()
+
+    stns_rd = locations["stn_rd"].unique()
+    stns_ev24 = locations["stn_ev24"].unique()
 
     # get knmi data stations closest to any grid cell
     oc_knmi_prec = hpd.ObsCollection.from_knmi(
-        locations=locations, starts=[start], ends=[end], meteo_vars=["RD"]
+        stns=stns_rd, starts=[start], ends=[end], meteo_vars=["RD"]
     )
 
     oc_knmi_evap = hpd.ObsCollection.from_knmi(
-        locations=locations, starts=[start], ends=[end], meteo_vars=["EV24"]
+        stns=stns_ev24, starts=[start], ends=[end], meteo_vars=["EV24"]
     )
 
     return locations, oc_knmi_prec, oc_knmi_evap
diff --git a/nlmod/read/webservices.py b/nlmod/read/webservices.py
@@ -1,6 +1,6 @@
 import logging
 import xml.etree.ElementTree as ET
-
+from io import BytesIO
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@@ -138,6 +138,7 @@ def wfs(
     paged=True,
     max_record_count=None,
     driver="GML",
+    timeout=120,
 ):
     """Download data from a wfs server."""
     params = dict(version=version, request="GetFeature")
@@ -191,7 +192,9 @@ def add_constrains(elem, constraints):
 
         # get the number of features
         params["resultType"] = "hits"
-        r = requests.get(url, params=params, timeout=120)
+        r = requests.get(url, params=params, timeout=timeout)
+        if not r.ok:
+            raise (Exception(f"Request not successful: {r.url}"))
         params.pop("resultType")
         root = ET.fromstring(r.text)
         if "ExceptionReport" in root.tag:
@@ -209,13 +212,17 @@ def add_constrains(elem, constraints):
         params["count"] = max_record_count
         for ip in range(int(np.ceil(n / max_record_count))):
             params["startindex"] = ip * max_record_count
-            req_url = requests.Request("GET", url, params=params).prepare().url
-            gdfs.append(gpd.read_file(req_url, driver=driver))
+            r = requests.get(url, params=params, timeout=timeout)
+            if not r.ok:
+                raise (Exception(f"Request not successful: {r.url}"))
+            gdfs.append(gpd.read_file(BytesIO(r.content), driver=driver))
         gdf = pd.concat(gdfs).reset_index(drop=True)
     else:
         # download all features in one go
-        req_url = requests.Request("GET", url, params=params).prepare().url
-        gdf = gpd.read_file(req_url, driver=driver)
+        r = requests.get(url, params=params, timeout=timeout)
+        if not r.ok:
+            raise (Exception(f"Request not successful: {r.url}"))
+        gdf = gpd.read_file(BytesIO(r.content), driver=driver)
 
     return gdf
 

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+nbconvert>=6.4.5
+netCDF4==1.5.7
+geocube
+gdown
+bottleneck
+contextily
diff --git a/tests/test_005_external_data.py b/tests/test_005_external_data.py
@@ -11,6 +11,14 @@ def test_get_recharge():
     ds.update(nlmod.read.knmi.get_recharge(ds))
 
 
+def test_get_reacharge_most_common():
+    # model with sea
+    ds = test_001_model.get_ds_from_cache("basic_sea_model")
+
+    # add knmi recharge to the model dataset
+    ds.update(nlmod.read.knmi.get_recharge(ds, most_common_station=True))
+
+
 def test_get_recharge_steady_state():
     # model with sea
     ds = test_001_model.get_ds_from_cache("basic_sea_model")