Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements gdf to da #290

Merged
merged 9 commits into from
Nov 17, 2023
165 changes: 130 additions & 35 deletions nlmod/dims/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -1142,11 +1142,12 @@ def gdf_to_data_array_struc(


def gdf_to_da(
gdf, ds, column, agg_method=None, fill_value=np.NaN, min_total_overlap=0.0
gdf, ds, column, agg_method=None, fill_value=np.NaN, min_total_overlap=0.0,
ix=None
):
"""Project vector data on a structured grid. Aggregate data if multiple
geometries are in a single cell. This method replaces
gdf_to_data_array_struc.
"""Project vector data on a grid. Aggregate data if multiple
geometries are in a single cell. Supports structured and vertex grids.
This method replaces gdf_to_data_array_struc.

Parameters
----------
Expand All @@ -1168,13 +1169,15 @@ def gdf_to_da(
min_total_overlap: float, optional
Only assign cells with a gdf-area larger than min_total_overlap * cell-area. The
default is 0.0
ix : GridIntersect, optional
If not provided it is computed from ds.

Returns
-------
da : xarray DataArray
The DataArray with the projected vector data.
"""
gdf_cellid = gdf_to_grid(gdf, ds)
gdf_cellid = gdf_to_grid(gdf, ds, ix=ix)
if min_total_overlap > 0:
gdf_cellid["area"] = gdf_cellid.area
area_sum = gdf_cellid[["cellid", "area"]].groupby("cellid").sum()
Expand Down Expand Up @@ -1204,9 +1207,15 @@ def gdf_to_da(
)
else:
gdf_agg.set_index(gdf_cellid.cellid.values, inplace=True)

da = util.get_da_from_da_ds(ds, dims=ds.top.dims, data=fill_value)
for ind, row in gdf_agg.iterrows():
da.values[ind] = row[column]

if ds.gridtype == "structured":
ixs, iys = zip(*gdf_agg.index.values)
da.values[ixs, iys] = gdf_agg[column]
elif ds.gridtype == "vertex":
da[gdf_agg.index] = gdf_agg[column]

return da


Expand Down Expand Up @@ -1258,8 +1267,7 @@ def _agg_max_area(gdf, col):


def _agg_area_weighted(gdf, col):
nanmask = gdf[col].isna()
aw = (gdf.area * gdf[col]).sum(skipna=True) / gdf.loc[~nanmask].area.sum()
aw = (gdf.area * gdf[col]).sum(skipna=True) / gdf.area.sum(skipna=True)
return aw


Expand Down Expand Up @@ -1331,7 +1339,8 @@ def aggregate_vector_per_cell(gdf, fields_methods, modelgrid=None):
fields (keys) in the Geodataframe with their aggregation method (items)
aggregation methods can be:
max, min, mean, sum, length_weighted (lines), max_length (lines),
area_weighted (polygon), max_area (polygon).
area_weighted (polygon), max_area (polygon), and functions supported by
pandas.*GroupBy.agg().
modelgrid : flopy Groundwater flow modelgrid
only necesary if one of the field methods is 'nearest'

Expand Down Expand Up @@ -1369,15 +1378,32 @@ def aggregate_vector_per_cell(gdf, fields_methods, modelgrid=None):
# aggregate data
gr = gdf.groupby(by="cellid")
celldata = pd.DataFrame(index=gr.groups.keys())
for cid, group in tqdm(gr, desc="Aggregate vector data"):
agg_dic = _get_aggregates_values(group, fields_methods, modelgrid)
for key, item in agg_dic.items():
celldata.loc[cid, key] = item

for field, method in fields_methods.items():
if method == "area_weighted":
gdf_copy = gdf.copy()
gdf_copy["area_times_field"] = gdf_copy["area"] * gdf_copy[field]

# skipna is not implemented by groupby therefore we use min_count=1
celldata[field] = (
gdf_copy.groupby(by="cellid")["area_times_field"].sum(min_count=1) /
gdf_copy.groupby(by="cellid")["area"].sum(min_count=1)
)

elif method in ("nearest", "length_weighted", "max_length", "max_area", "center_grid"):
for cid, group in tqdm(gr, desc="Aggregate vector data"):
agg_dic = _get_aggregates_values(group, {field: method}, modelgrid)

for key, item in agg_dic.items():
celldata.loc[cid, key] = item

else:
celldata[field] = gr[field].agg(method)

return celldata


def gdf_to_bool_da(gdf, ds):
def gdf_to_bool_da(gdf, ds, ix=None, buffer=0., **kwargs):
"""convert a GeoDataFrame with polygon geometries into a data array
corresponding to the modelgrid in which each cell is 1 (True) if one or
more geometries are (partly) in that cell.
Expand All @@ -1388,6 +1414,72 @@ def gdf_to_bool_da(gdf, ds):
shapes that will be rasterised.
ds : xr.DataSet
xarray with model data
ix : flopy.utils.GridIntersect, optional
If not provided it is computed from ds.
buffer : float, optional
buffer around the geometries. The default is 0.
**kwargs : keyword arguments
keyword arguments are passed to the intersect_*-methods.

Returns
-------
da : xr.DataArray
True if polygon is in cell, False otherwise. Grid dimensions according to ds.
"""
da = gdf_to_count_da(gdf, ds, ix=ix, buffer=buffer, **kwargs) > 0
return da


def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None, ix=None, buffer=0., **kwargs):
"""convert a GeoDataFrame with polygon geometries into a model dataset with
a data_array named 'da_name' in which each cell is 1 (True) if one or more
geometries are (partly) in that cell.

Parameters
----------
gdf : geopandas.GeoDataFrame
polygon shapes with surface water.
ds : xr.DataSet
xarray with model data
da_name : str
The name of the variable with boolean data in the ds_out
keep_coords : tuple or None, optional
the coordinates in ds the you want keep in your empty ds. If None all
coordinates are kept from original ds. The default is None.
ix : flopy.utils.GridIntersect, optional
If not provided it is computed from ds.
buffer : float, optional
buffer around the geometries. The default is 0.
**kwargs : keyword arguments
keyword arguments are passed to the intersect_*-methods.

Returns
-------
ds_out : xr.Dataset
Dataset with a single DataArray, this DataArray is 1 if polygon is in
cell, 0 otherwise. Grid dimensions according to ds and mfgrid.
"""
ds_out = util.get_ds_empty(ds, keep_coords=keep_coords)
ds_out[da_name] = gdf_to_bool_da(gdf, ds, ix=ix, buffer=buffer, **kwargs)

return ds_out


def gdf_to_count_da(gdf, ds, ix=None, buffer=0., **kwargs):
"""Counts in how many polygons a coordinate of ds appears.

Parameters
----------
gdf : geopandas.GeoDataFrame or shapely.geometry
shapes that will be rasterised.
ds : xr.DataSet
xarray with model data
ix : flopy.utils.GridIntersect, optional
If not provided it is computed from ds.
buffer : float, optional
buffer around the geometries. The default is 0.
**kwargs : keyword arguments
keyword arguments are passed to the intersect_*-methods.

Returns
-------
Expand All @@ -1399,10 +1491,10 @@ def gdf_to_bool_da(gdf, ds):
affine = get_affine_world_to_mod(ds)
gdf = affine_transform_gdf(gdf, affine)

modelgrid = modelgrid_from_ds(ds)

# build list of gridcells
ix = GridIntersect(modelgrid, method="vertex")
if ix is None:
modelgrid = modelgrid_from_ds(ds)
ix = GridIntersect(modelgrid, method="vertex")

if ds.gridtype == "structured":
da = util.get_da_from_da_ds(ds, dims=("y", "x"), data=0)
Expand All @@ -1417,28 +1509,25 @@ def gdf_to_bool_da(gdf, ds):
geoms = [gdf]

for geom in geoms:
cids = ix.intersects(geom)["cellids"]
if buffer > 0.:
cids = ix.intersects(geom.buffer(buffer), **kwargs)["cellids"]
else:
cids = ix.intersects(geom, **kwargs)["cellids"]

if len(cids) == 0:
continue

if ds.gridtype == "structured":
ncol = modelgrid.ncol
for cid in cids:
if isinstance(cid, tuple):
i, j = cid
else:
# TODO: temporary fix until flopy intersect on structured
# grid returns row, col again.
i = int((cid) / ncol)
j = cid - i * ncol
da[i, j] = 1
ixs, iys = zip(*cids)
da.values[ixs, iys] += 1
elif ds.gridtype == "vertex":
da[cids.astype(int)] = 1
da[cids.astype(int)] += 1

return da


def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None):
"""convert a GeoDataFrame with polygon geometries into a model dataset with
a data_array named 'da_name' in which each cell is 1 (True) if one or more
geometries are (partly) in that cell.
def gdf_to_count_ds(gdf, ds, da_name, keep_coords=None, ix=None, buffer=0., **kwargs):
"""Counts in how many polygons a coordinate of ds appears.

Parameters
----------
Expand All @@ -1451,6 +1540,12 @@ def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None):
keep_coords : tuple or None, optional
the coordinates in ds the you want keep in your empty ds. If None all
coordinates are kept from original ds. The default is None.
ix : flopy.utils.GridIntersect, optional
If not provided it is computed from ds.
buffer : float, optional
buffer around the geometries. The default is 0.
**kwargs : keyword arguments
keyword arguments are passed to the intersect_*-methods.

Returns
-------
Expand All @@ -1459,7 +1554,7 @@ def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None):
cell, 0 otherwise. Grid dimensions according to ds and mfgrid.
"""
ds_out = util.get_ds_empty(ds, keep_coords=keep_coords)
ds_out[da_name] = gdf_to_bool_da(gdf, ds)
ds_out[da_name] = gdf_to_count_da(gdf, ds, ix=ix, buffer=buffer, **kwargs)

return ds_out

Expand Down
Loading