Skip to content

Commit

Permalink
Knmi stations (#174)
Browse files Browse the repository at this point in the history
* Add option to only use most common station

Slow determination of stations (locations.geo.get_nearest_point) is removed

* solve minor bug and add test for most_common_station=True

* Set gridgen files in a separate directory. Solves issue #158

* minor changes

* Make most_common_station work with structured grids as well

* Create gridgen directory when it does not exist

* Solve fiona.errors.DriverError:  not recognized as a supported file format.

* Remove rasterstats as a dependency for tests and docs

* Codacy stuff
  • Loading branch information
rubencalje authored and dbrakenhoff committed May 9, 2023
1 parent ad39be6 commit f1c920e
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 42 deletions.
2 changes: 1 addition & 1 deletion docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,4 @@ potential solutions.
On top of that there are some optional dependecies, only needed (and imported) in a single method:

- bottleneck (used in calculate_gxg)
- geocube (used in add_min_ahn_to_gdf)
- geocube (used in add_min_ahn_to_gdf)
1 change: 0 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ Ipython
ipykernel
nbsphinx
netCDF4==1.5.7
rasterstats
geocube
bottleneck
contextily
2 changes: 1 addition & 1 deletion nlmod/dims/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import logging
import os
import warnings

import os
import flopy
import geopandas as gpd
import numpy as np
Expand Down
4 changes: 4 additions & 0 deletions nlmod/dims/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ def calculate_thickness(ds, top="top", bot="botm"):
else:
raise ValueError("2d top should have same last dimension as bot")
if isinstance(ds[bot], xr.DataArray):
if hasattr(ds[bot], "long_name"):
thickness.attrs["long_name"] = "thickness"
if hasattr(ds[bot], "standard_name"):
thickness.attrs["standard_name"] = "thickness_of_layer"
if hasattr(ds[bot], "units"):
if ds[bot].units == "mNAP":
thickness.attrs["units"] = "m"
Expand Down
9 changes: 5 additions & 4 deletions nlmod/dims/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,11 +375,12 @@ def vertex_da_to_ds(da, ds, method="nearest"):
Parameters
----------
da : xaray.DataArray
A vertex DataArray. When the DataArray does not have 'icell2d' as a
dimension, the original DataArray is retured. The DataArray da can
contain other dimensions as well (for example 'layer' or time'' ).
A vertex DataArray. When the DataArray does not have 'icell2d' as a dimension,
the original DataArray is retured. The DataArray da can contain other dimensions
as well (for example 'layer' or time'' ).
ds : xarray.Dataset
The model dataset with coordinates x and y.
The model dataset to which the DataArray needs to be resampled, with coordinates
x and y.
method : str, optional
The interpolation method, see griddata. The default is "nearest".
Expand Down
72 changes: 43 additions & 29 deletions nlmod/read/knmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

import hydropandas as hpd
from hydropandas.io import knmi as hpd_knmi
import numpy as np
import pandas as pd

Expand All @@ -13,7 +14,7 @@


@cache.cache_netcdf
def get_recharge(ds, method="linear"):
def get_recharge(ds, method="linear", most_common_station=False):
"""add multiple recharge packages to the groundwater flow model with knmi
data by following these steps:
Expand Down Expand Up @@ -41,6 +42,9 @@ def get_recharge(ds, method="linear"):
If 'linear', calculate recharge by subtracting evaporation from precipitation.
If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
The defaults is 'linear'.
most_common_station : bool, optional
When True, only download data from the station that is most common in the model
area. The default is False
Returns
-------
Expand All @@ -51,7 +55,7 @@ def get_recharge(ds, method="linear"):
if "time" not in ds:
raise (
AttributeError(
"'Dataset' object has no attribute 'time'. "
"'Dataset' object has no 'time' dimension. "
"Please run nlmod.time.set_ds_time()"
)
)
Expand All @@ -71,50 +75,45 @@ def get_recharge(ds, method="linear"):
shape = [len(ds_out[dim]) for dim in dims]

locations, oc_knmi_prec, oc_knmi_evap = get_knmi_at_locations(
ds, start=start, end=end
)

# add closest precipitation and evaporation measurement station to each cell
locations[["prec_point", "distance"]] = locations.geo.get_nearest_point(
oc_knmi_prec
)
locations[["evap_point", "distance"]] = locations.geo.get_nearest_point(
oc_knmi_evap
ds, start=start, end=end, most_common_station=most_common_station
)

if method in ["linear"]:
ds_out["recharge"] = dims, np.zeros(shape)

# find unique combination of precipitation and evaporation station
unique_combinations = locations.drop_duplicates(["prec_point", "evap_point"])[
["prec_point", "evap_point"]
unique_combinations = locations.drop_duplicates(["stn_rd", "stn_ev24"])[
["stn_rd", "stn_ev24"]
].values

for prec_stn, evap_stn in unique_combinations:
for stn_rd, stn_ev24 in unique_combinations:
# get locations with the same prec and evap station
loc_sel = locations.loc[
(locations["prec_point"] == prec_stn)
& (locations["evap_point"] == evap_stn)
]
mask = (locations["stn_rd"] == stn_rd) & (locations["stn_ev24"] == stn_ev24)
loc_sel = locations.loc[mask]

# calculate recharge time series
prec = oc_knmi_prec.loc[prec_stn, "obs"]["RD"].resample("D").nearest()
evap = oc_knmi_evap.loc[evap_stn, "obs"]["EV24"].resample("D").nearest()
index = oc_knmi_prec.index[oc_knmi_prec.station == stn_rd][0]
prec = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
index = oc_knmi_evap.index[oc_knmi_evap.station == stn_ev24][0]
evap = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
ts = (prec - evap).dropna()
ts.name = f"{prec.name}-{evap.name}"

_add_ts_to_ds(ts, loc_sel, "recharge", ds_out)

elif method == "separate":
ds_out["recharge"] = dims, np.zeros(shape)
for stn in locations["prec_point"].unique():
ts = oc_knmi_prec.loc[stn, "obs"]["RD"]
loc_sel = locations.loc[(locations["prec_point"] == stn)]
for stn in locations["stn_rd"].unique():
index = oc_knmi_prec.index[oc_knmi_prec.station == stn][0]
ts = oc_knmi_prec.loc[index, "obs"]["RD"].resample("D").nearest()
loc_sel = locations.loc[(locations["stn_rd"] == stn)]
_add_ts_to_ds(ts, loc_sel, "recharge", ds_out)

ds_out["evaporation"] = dims, np.zeros(shape)
for stn in locations["evap_point"].unique():
ts = oc_knmi_evap.loc[stn, "obs"]["EV24"]
loc_sel = locations.loc[(locations["evap_point"] == stn)]
for stn in locations["stn_ev24"].unique():
index = oc_knmi_evap.index[oc_knmi_evap.station == stn][0]
ts = oc_knmi_evap.loc[index, "obs"]["EV24"].resample("D").nearest()
loc_sel = locations.loc[(locations["stn_ev24"] == stn)]
_add_ts_to_ds(ts, loc_sel, "evaporation", ds_out)
else:
raise (Exception(f"Unknown method: {method}"))
Expand Down Expand Up @@ -225,7 +224,7 @@ def get_locations_structured(ds):
return locations


def get_knmi_at_locations(ds, start="2010", end=None):
def get_knmi_at_locations(ds, start="2010", end=None, most_common_station=False):
"""get knmi data at the locations of the active grid cells in ds.
Parameters
Expand Down Expand Up @@ -258,14 +257,29 @@ def get_knmi_at_locations(ds, start="2010", end=None):
locations = get_locations_vertex(ds)
else:
raise ValueError("gridtype should be structured or vertex")
locations["stn_rd"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="EV24")
if most_common_station:
if ds.gridtype == "structured":
# set the most common station to all locations
locations["stn_rd"] = locations["stn_rd"].value_counts().idxmax()
locations["stn_ev24"] = locations["stn_ev24"].value_counts().idxmax()
else:
# set the station with the largest area to all locations
locations["area"] = ds["area"].loc[locations.index]
locations["stn_rd"] = locations.groupby("stn_rd").sum()["area"].idxmax()
locations["stn_ev24"] = locations.groupby("stn_ev24").sum()["area"].idxmax()

stns_rd = locations["stn_rd"].unique()
stns_ev24 = locations["stn_ev24"].unique()

# get knmi data stations closest to any grid cell
oc_knmi_prec = hpd.ObsCollection.from_knmi(
locations=locations, starts=[start], ends=[end], meteo_vars=["RD"]
stns=stns_rd, starts=[start], ends=[end], meteo_vars=["RD"]
)

oc_knmi_evap = hpd.ObsCollection.from_knmi(
locations=locations, starts=[start], ends=[end], meteo_vars=["EV24"]
stns=stns_ev24, starts=[start], ends=[end], meteo_vars=["EV24"]
)

return locations, oc_knmi_prec, oc_knmi_evap
19 changes: 13 additions & 6 deletions nlmod/read/webservices.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import xml.etree.ElementTree as ET

from io import BytesIO
import geopandas as gpd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -138,6 +138,7 @@ def wfs(
paged=True,
max_record_count=None,
driver="GML",
timeout=120,
):
"""Download data from a wfs server."""
params = dict(version=version, request="GetFeature")
Expand Down Expand Up @@ -191,7 +192,9 @@ def add_constrains(elem, constraints):

# get the number of features
params["resultType"] = "hits"
r = requests.get(url, params=params, timeout=120)
r = requests.get(url, params=params, timeout=timeout)
if not r.ok:
raise (Exception(f"Request not successful: {r.url}"))
params.pop("resultType")
root = ET.fromstring(r.text)
if "ExceptionReport" in root.tag:
Expand All @@ -209,13 +212,17 @@ def add_constrains(elem, constraints):
params["count"] = max_record_count
for ip in range(int(np.ceil(n / max_record_count))):
params["startindex"] = ip * max_record_count
req_url = requests.Request("GET", url, params=params).prepare().url
gdfs.append(gpd.read_file(req_url, driver=driver))
r = requests.get(url, params=params, timeout=timeout)
if not r.ok:
raise (Exception(f"Request not successful: {r.url}"))
gdfs.append(gpd.read_file(BytesIO(r.content), driver=driver))
gdf = pd.concat(gdfs).reset_index(drop=True)
else:
# download all features in one go
req_url = requests.Request("GET", url, params=params).prepare().url
gdf = gpd.read_file(req_url, driver=driver)
r = requests.get(url, params=params, timeout=timeout)
if not r.ok:
raise (Exception(f"Request not successful: {r.url}"))
gdf = gpd.read_file(BytesIO(r.content), driver=driver)

return gdf

Expand Down
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
nbconvert>=6.4.5
netCDF4==1.5.7
geocube
gdown
bottleneck
contextily
8 changes: 8 additions & 0 deletions tests/test_005_external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ def test_get_recharge():
ds.update(nlmod.read.knmi.get_recharge(ds))


def test_get_reacharge_most_common():
# model with sea
ds = test_001_model.get_ds_from_cache("basic_sea_model")

# add knmi recharge to the model dataset
ds.update(nlmod.read.knmi.get_recharge(ds, most_common_station=True))


def test_get_recharge_steady_state():
# model with sea
ds = test_001_model.get_ds_from_cache("basic_sea_model")
Expand Down

0 comments on commit f1c920e

Please sign in to comment.