Skip to content

Commit

Permalink
Add option to only use most common station
Browse files Browse the repository at this point in the history
Slow determination of stations (locations.geo.get_nearest_point) is removed
  • Loading branch information
rubencalje committed May 8, 2023
1 parent 69cdafd commit 07aa8f3
Showing 1 changed file with 40 additions and 28 deletions.
68 changes: 40 additions & 28 deletions nlmod/read/knmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

import hydropandas as hpd
from hydropandas.io import knmi as hpd_knmi
import numpy as np
import pandas as pd

Expand All @@ -13,7 +14,7 @@


@cache.cache_netcdf
def get_recharge(ds, method="linear"):
def get_recharge(ds, method="linear", most_common_station=False):
"""add multiple recharge packages to the groundwater flow model with knmi
data by following these steps:
Expand Down Expand Up @@ -41,6 +42,9 @@ def get_recharge(ds, method="linear"):
If 'linear', calculate recharge by subtracting evaporation from precipitation.
If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
The defaults is 'linear'.
most_common_station : bool, optional
When True, only download data from the station that is most common in the model
area. The default is False
Returns
-------
Expand Down Expand Up @@ -71,50 +75,45 @@ def get_recharge(ds, method="linear"):
shape = [len(ds_out[dim]) for dim in dims]

locations, oc_knmi_prec, oc_knmi_evap = get_knmi_at_locations(
ds, start=start, end=end
)

# add closest precipitation and evaporation measurement station to each cell
locations[["prec_point", "distance"]] = locations.geo.get_nearest_point(
oc_knmi_prec
)
locations[["evap_point", "distance"]] = locations.geo.get_nearest_point(
oc_knmi_evap
ds, start=start, end=end, most_common_station=most_common_station
)

if method in ["linear"]:
ds_out["recharge"] = dims, np.zeros(shape)

# find unique combination of precipitation and evaporation station
unique_combinations = locations.drop_duplicates(["prec_point", "evap_point"])[
["prec_point", "evap_point"]
unique_combinations = locations.drop_duplicates(["stn_rd", "stn_ev24"])[
["stn_rd", "stn_ev24"]
].values

for prec_stn, evap_stn in unique_combinations:
for stn_rd, stn_ev24 in unique_combinations:
# get locations with the same prec and evap station
loc_sel = locations.loc[
(locations["prec_point"] == prec_stn)
& (locations["evap_point"] == evap_stn)
]
mask = (locations["stn_rd"] == stn_rd) & (locations["stn_ev24"] == stn_ev24)
loc_sel = locations.loc[mask]

# calculate recharge time series
prec = oc_knmi_prec.loc[prec_stn, "obs"]["RD"].resample("D").nearest()
evap = oc_knmi_evap.loc[evap_stn, "obs"]["EV24"].resample("D").nearest()
index = oc_knmi_prec.index[oc_knmi_prec.station == stn_rd][0]
prec = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
index = oc_knmi_evap.index[oc_knmi_evap.station == stn_rd][0]
evap = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
ts = (prec - evap).dropna()
ts.name = f"{prec.name}-{evap.name}"

_add_ts_to_ds(ts, loc_sel, "recharge", ds_out)

elif method == "separate":
ds_out["recharge"] = dims, np.zeros(shape)
for stn in locations["prec_point"].unique():
ts = oc_knmi_prec.loc[stn, "obs"]["RD"]
loc_sel = locations.loc[(locations["prec_point"] == stn)]
for stn in locations["stn_rd"].unique():
index = oc_knmi_prec.index[oc_knmi_prec.station == stn][0]
ts = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
loc_sel = locations.loc[(locations["stn_rd"] == stn)]
_add_ts_to_ds(ts, loc_sel, "recharge", ds_out)

ds_out["evaporation"] = dims, np.zeros(shape)
for stn in locations["evap_point"].unique():
ts = oc_knmi_evap.loc[stn, "obs"]["EV24"]
loc_sel = locations.loc[(locations["evap_point"] == stn)]
for stn in locations["stn_ev24"].unique():
index = oc_knmi_evap.index[oc_knmi_evap.station == stn][0]
ts = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
loc_sel = locations.loc[(locations["stn_ev24"] == stn)]
_add_ts_to_ds(ts, loc_sel, "evaporation", ds_out)
else:
raise (Exception(f"Unknown method: {method}"))
Expand Down Expand Up @@ -225,7 +224,7 @@ def get_locations_structured(ds):
return locations


def get_knmi_at_locations(ds, start="2010", end=None):
def get_knmi_at_locations(ds, start="2010", end=None, most_common_station=False):
"""get knmi data at the locations of the active grid cells in ds.
Parameters
Expand Down Expand Up @@ -258,14 +257,27 @@ def get_knmi_at_locations(ds, start="2010", end=None):
locations = get_locations_vertex(ds)
else:
raise ValueError("gridtype should be structured or vertex")
locations["stn_rd"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="EV24")
if most_common_station:
if ds.gridtype == "structured":
raise (
Exception("Most_common_station not yet supported for structured grids")
)
locations["area"] = ds["area"].loc[locations.index]
locations["stn_rd"] = locations.groupby("stn_rd").sum()["area"].idxmax()
locations["stn_ev24"] = locations.groupby("stn_ev24").sum()["area"].idxmax()

stns_rd = locations["stn_rd"].unique()
stns_ev24 = locations["stn_ev24"].unique()

# get knmi data stations closest to any grid cell
oc_knmi_prec = hpd.ObsCollection.from_knmi(
locations=locations, starts=[start], ends=[end], meteo_vars=["RD"]
stns=stns_rd, starts=[start], ends=[end], meteo_vars=["RD"]
)

oc_knmi_evap = hpd.ObsCollection.from_knmi(
locations=locations, starts=[start], ends=[end], meteo_vars=["EV24"]
stns=stns_ev24, starts=[start], ends=[end], meteo_vars=["EV24"]
)

return locations, oc_knmi_prec, oc_knmi_evap

0 comments on commit 07aa8f3

Please sign in to comment.