Add option to only use most common station

Slow determination of stations (locations.geo.get_nearest_point) is removed
gwmod · May 8, 2023 · 07aa8f3 · 07aa8f3
1 parent 69cdafd
commit 07aa8f3
Showing 1 changed file with 40 additions and 28 deletions.
diff --git a/nlmod/read/knmi.py b/nlmod/read/knmi.py
@@ -2,6 +2,7 @@
 import logging
 
 import hydropandas as hpd
+from hydropandas.io import knmi as hpd_knmi
 import numpy as np
 import pandas as pd
 
@@ -13,7 +14,7 @@
 
 
 @cache.cache_netcdf
-def get_recharge(ds, method="linear"):
+def get_recharge(ds, method="linear", most_common_station=False):
     """add multiple recharge packages to the groundwater flow model with knmi
     data by following these steps:
 
@@ -41,6 +42,9 @@ def get_recharge(ds, method="linear"):
         If 'linear', calculate recharge by subtracting evaporation from precipitation.
         If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
         The defaults is 'linear'.
+    most_common_station : bool, optional
+        When True, only download data from the station that is most common in the model
+        area. The default is False
 
     Returns
     -------
@@ -71,50 +75,45 @@ def get_recharge(ds, method="linear"):
     shape = [len(ds_out[dim]) for dim in dims]
 
     locations, oc_knmi_prec, oc_knmi_evap = get_knmi_at_locations(
-        ds, start=start, end=end
-    )
-
-    # add closest precipitation and evaporation measurement station to each cell
-    locations[["prec_point", "distance"]] = locations.geo.get_nearest_point(
-        oc_knmi_prec
-    )
-    locations[["evap_point", "distance"]] = locations.geo.get_nearest_point(
-        oc_knmi_evap
+        ds, start=start, end=end, most_common_station=most_common_station
     )
 
     if method in ["linear"]:
         ds_out["recharge"] = dims, np.zeros(shape)
 
         # find unique combination of precipitation and evaporation station
-        unique_combinations = locations.drop_duplicates(["prec_point", "evap_point"])[
-            ["prec_point", "evap_point"]
+        unique_combinations = locations.drop_duplicates(["stn_rd", "stn_ev24"])[
+            ["stn_rd", "stn_ev24"]
         ].values
 
-        for prec_stn, evap_stn in unique_combinations:
+        for stn_rd, stn_ev24 in unique_combinations:
             # get locations with the same prec and evap station
-            loc_sel = locations.loc[
-                (locations["prec_point"] == prec_stn)
-                & (locations["evap_point"] == evap_stn)
-            ]
+            mask = (locations["stn_rd"] == stn_rd) & (locations["stn_ev24"] == stn_ev24)
+            loc_sel = locations.loc[mask]
 
             # calculate recharge time series
-            prec = oc_knmi_prec.loc[prec_stn, "obs"]["RD"].resample("D").nearest()
-            evap = oc_knmi_evap.loc[evap_stn, "obs"]["EV24"].resample("D").nearest()
+            index = oc_knmi_prec.index[oc_knmi_prec.station == stn_rd][0]
+            prec = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
+            index = oc_knmi_evap.index[oc_knmi_evap.station == stn_rd][0]
+            evap = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
             ts = (prec - evap).dropna()
             ts.name = f"{prec.name}-{evap.name}"
 
             _add_ts_to_ds(ts, loc_sel, "recharge", ds_out)
 
     elif method == "separate":
         ds_out["recharge"] = dims, np.zeros(shape)
-        for stn in locations["prec_point"].unique():
-            ts = oc_knmi_prec.loc[stn, "obs"]["RD"]
-            loc_sel = locations.loc[(locations["prec_point"] == stn)]
+        for stn in locations["stn_rd"].unique():
+            index = oc_knmi_prec.index[oc_knmi_prec.station == stn][0]
+            ts = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
+            loc_sel = locations.loc[(locations["stn_rd"] == stn)]
             _add_ts_to_ds(ts, loc_sel, "recharge", ds_out)
+
         ds_out["evaporation"] = dims, np.zeros(shape)
-        for stn in locations["evap_point"].unique():
-            ts = oc_knmi_evap.loc[stn, "obs"]["EV24"]
-            loc_sel = locations.loc[(locations["evap_point"] == stn)]
+        for stn in locations["stn_ev24"].unique():
+            index = oc_knmi_evap.index[oc_knmi_evap.station == stn][0]
+            ts = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
+            loc_sel = locations.loc[(locations["stn_ev24"] == stn)]
             _add_ts_to_ds(ts, loc_sel, "evaporation", ds_out)
     else:
         raise (Exception(f"Unknown method: {method}"))
@@ -225,7 +224,7 @@ def get_locations_structured(ds):
     return locations
 
 
-def get_knmi_at_locations(ds, start="2010", end=None):
+def get_knmi_at_locations(ds, start="2010", end=None, most_common_station=False):
     """get knmi data at the locations of the active grid cells in ds.
 
     Parameters
@@ -258,14 +257,27 @@ def get_knmi_at_locations(ds, start="2010", end=None):
         locations = get_locations_vertex(ds)
     else:
         raise ValueError("gridtype should be structured or vertex")
+    locations["stn_rd"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
+    locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="EV24")
+    if most_common_station:
+        if ds.gridtype == "structured":
+            raise (
+                Exception("Most_common_station not yet supported for structured grids")
+            )
+        locations["area"] = ds["area"].loc[locations.index]
+        locations["stn_rd"] = locations.groupby("stn_rd").sum()["area"].idxmax()
+        locations["stn_ev24"] = locations.groupby("stn_ev24").sum()["area"].idxmax()
+
+    stns_rd = locations["stn_rd"].unique()
+    stns_ev24 = locations["stn_ev24"].unique()
 
     # get knmi data stations closest to any grid cell
     oc_knmi_prec = hpd.ObsCollection.from_knmi(
-        locations=locations, starts=[start], ends=[end], meteo_vars=["RD"]
+        stns=stns_rd, starts=[start], ends=[end], meteo_vars=["RD"]
     )
 
     oc_knmi_evap = hpd.ObsCollection.from_knmi(
-        locations=locations, starts=[start], ends=[end], meteo_vars=["EV24"]
+        stns=stns_ev24, starts=[start], ends=[end], meteo_vars=["EV24"]
     )
 
     return locations, oc_knmi_prec, oc_knmi_evap