diff --git a/nlmod/dims/grid.py b/nlmod/dims/grid.py index 2c68192b..42e4e3f3 100644 --- a/nlmod/dims/grid.py +++ b/nlmod/dims/grid.py @@ -1142,11 +1142,12 @@ def gdf_to_data_array_struc( def gdf_to_da( - gdf, ds, column, agg_method=None, fill_value=np.NaN, min_total_overlap=0.0 + gdf, ds, column, agg_method=None, fill_value=np.NaN, min_total_overlap=0.0, + ix=None ): - """Project vector data on a structured grid. Aggregate data if multiple - geometries are in a single cell. This method replaces - gdf_to_data_array_struc. + """Project vector data on a grid. Aggregate data if multiple + geometries are in a single cell. Supports structured and vertex grids. + This method replaces gdf_to_data_array_struc. Parameters ---------- @@ -1168,13 +1169,15 @@ def gdf_to_da( min_total_overlap: float, optional Only assign cells with a gdf-area larger than min_total_overlap * cell-area. The default is 0.0 + ix : GridIntersect, optional + If not provided it is computed from ds. Returns ------- da : xarray DataArray The DataArray with the projected vector data. """ - gdf_cellid = gdf_to_grid(gdf, ds) + gdf_cellid = gdf_to_grid(gdf, ds, ix=ix) if min_total_overlap > 0: gdf_cellid["area"] = gdf_cellid.area area_sum = gdf_cellid[["cellid", "area"]].groupby("cellid").sum() @@ -1204,9 +1207,15 @@ def gdf_to_da( ) else: gdf_agg.set_index(gdf_cellid.cellid.values, inplace=True) + da = util.get_da_from_da_ds(ds, dims=ds.top.dims, data=fill_value) - for ind, row in gdf_agg.iterrows(): - da.values[ind] = row[column] + + if ds.gridtype == "structured": + ixs, iys = zip(*gdf_agg.index.values) + da.values[ixs, iys] = gdf_agg[column] + elif ds.gridtype == "vertex": + da[gdf_agg.index] = gdf_agg[column] + return da @@ -1258,8 +1267,7 @@ def _agg_max_area(gdf, col): def _agg_area_weighted(gdf, col): - nanmask = gdf[col].isna() - aw = (gdf.area * gdf[col]).sum(skipna=True) / gdf.loc[~nanmask].area.sum() + aw = (gdf.area * gdf[col]).sum(skipna=True) / gdf.area.sum(skipna=True) return aw @@ -1331,7 +1339,8 @@ def aggregate_vector_per_cell(gdf, fields_methods, modelgrid=None): fields (keys) in the Geodataframe with their aggregation method (items) aggregation methods can be: max, min, mean, sum, length_weighted (lines), max_length (lines), - area_weighted (polygon), max_area (polygon). + area_weighted (polygon), max_area (polygon), and functions supported by + pandas.*GroupBy.agg(). modelgrid : flopy Groundwater flow modelgrid only necesary if one of the field methods is 'nearest' @@ -1369,15 +1378,32 @@ def aggregate_vector_per_cell(gdf, fields_methods, modelgrid=None): # aggregate data gr = gdf.groupby(by="cellid") celldata = pd.DataFrame(index=gr.groups.keys()) - for cid, group in tqdm(gr, desc="Aggregate vector data"): - agg_dic = _get_aggregates_values(group, fields_methods, modelgrid) - for key, item in agg_dic.items(): - celldata.loc[cid, key] = item + + for field, method in fields_methods.items(): + if method == "area_weighted": + gdf_copy = gdf.copy() + gdf_copy["area_times_field"] = gdf_copy["area"] * gdf_copy[field] + + # skipna is not implemented by groupby therefore we use min_count=1 + celldata[field] = ( + gdf_copy.groupby(by="cellid")["area_times_field"].sum(min_count=1) / + gdf_copy.groupby(by="cellid")["area"].sum(min_count=1) + ) + + elif method in ("nearest", "length_weighted", "max_length", "max_area", "center_grid"): + for cid, group in tqdm(gr, desc="Aggregate vector data"): + agg_dic = _get_aggregates_values(group, {field: method}, modelgrid) + + for key, item in agg_dic.items(): + celldata.loc[cid, key] = item + + else: + celldata[field] = gr[field].agg(method) return celldata -def gdf_to_bool_da(gdf, ds): +def gdf_to_bool_da(gdf, ds, ix=None, buffer=0., **kwargs): """convert a GeoDataFrame with polygon geometries into a data array corresponding to the modelgrid in which each cell is 1 (True) if one or more geometries are (partly) in that cell. @@ -1388,6 +1414,72 @@ def gdf_to_bool_da(gdf, ds): shapes that will be rasterised. ds : xr.DataSet xarray with model data + ix : flopy.utils.GridIntersect, optional + If not provided it is computed from ds. + buffer : float, optional + buffer around the geometries. The default is 0. + **kwargs : keyword arguments + keyword arguments are passed to the intersect_*-methods. + + Returns + ------- + da : xr.DataArray + True if polygon is in cell, False otherwise. Grid dimensions according to ds. + """ + da = gdf_to_count_da(gdf, ds, ix=ix, buffer=buffer, **kwargs) > 0 + return da + + +def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None, ix=None, buffer=0., **kwargs): + """convert a GeoDataFrame with polygon geometries into a model dataset with + a data_array named 'da_name' in which each cell is 1 (True) if one or more + geometries are (partly) in that cell. + + Parameters + ---------- + gdf : geopandas.GeoDataFrame + polygon shapes with surface water. + ds : xr.DataSet + xarray with model data + da_name : str + The name of the variable with boolean data in the ds_out + keep_coords : tuple or None, optional + the coordinates in ds the you want keep in your empty ds. If None all + coordinates are kept from original ds. The default is None. + ix : flopy.utils.GridIntersect, optional + If not provided it is computed from ds. + buffer : float, optional + buffer around the geometries. The default is 0. + **kwargs : keyword arguments + keyword arguments are passed to the intersect_*-methods. + + Returns + ------- + ds_out : xr.Dataset + Dataset with a single DataArray, this DataArray is 1 if polygon is in + cell, 0 otherwise. Grid dimensions according to ds and mfgrid. + """ + ds_out = util.get_ds_empty(ds, keep_coords=keep_coords) + ds_out[da_name] = gdf_to_bool_da(gdf, ds, ix=ix, buffer=buffer, **kwargs) + + return ds_out + + +def gdf_to_count_da(gdf, ds, ix=None, buffer=0., **kwargs): + """Counts in how many polygons a coordinate of ds appears. + + Parameters + ---------- + gdf : geopandas.GeoDataFrame or shapely.geometry + shapes that will be rasterised. + ds : xr.DataSet + xarray with model data + ix : flopy.utils.GridIntersect, optional + If not provided it is computed from ds. + buffer : float, optional + buffer around the geometries. The default is 0. + **kwargs : keyword arguments + keyword arguments are passed to the intersect_*-methods. Returns ------- @@ -1399,10 +1491,10 @@ def gdf_to_bool_da(gdf, ds): affine = get_affine_world_to_mod(ds) gdf = affine_transform_gdf(gdf, affine) - modelgrid = modelgrid_from_ds(ds) - # build list of gridcells - ix = GridIntersect(modelgrid, method="vertex") + if ix is None: + modelgrid = modelgrid_from_ds(ds) + ix = GridIntersect(modelgrid, method="vertex") if ds.gridtype == "structured": da = util.get_da_from_da_ds(ds, dims=("y", "x"), data=0) @@ -1417,28 +1509,25 @@ def gdf_to_bool_da(gdf, ds): geoms = [gdf] for geom in geoms: - cids = ix.intersects(geom)["cellids"] + if buffer > 0.: + cids = ix.intersects(geom.buffer(buffer), **kwargs)["cellids"] + else: + cids = ix.intersects(geom, **kwargs)["cellids"] + + if len(cids) == 0: + continue + if ds.gridtype == "structured": - ncol = modelgrid.ncol - for cid in cids: - if isinstance(cid, tuple): - i, j = cid - else: - # TODO: temporary fix until flopy intersect on structured - # grid returns row, col again. - i = int((cid) / ncol) - j = cid - i * ncol - da[i, j] = 1 + ixs, iys = zip(*cids) + da.values[ixs, iys] += 1 elif ds.gridtype == "vertex": - da[cids.astype(int)] = 1 + da[cids.astype(int)] += 1 return da -def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None): - """convert a GeoDataFrame with polygon geometries into a model dataset with - a data_array named 'da_name' in which each cell is 1 (True) if one or more - geometries are (partly) in that cell. +def gdf_to_count_ds(gdf, ds, da_name, keep_coords=None, ix=None, buffer=0., **kwargs): + """Counts in how many polygons a coordinate of ds appears. Parameters ---------- @@ -1451,6 +1540,12 @@ def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None): keep_coords : tuple or None, optional the coordinates in ds the you want keep in your empty ds. If None all coordinates are kept from original ds. The default is None. + ix : flopy.utils.GridIntersect, optional + If not provided it is computed from ds. + buffer : float, optional + buffer around the geometries. The default is 0. + **kwargs : keyword arguments + keyword arguments are passed to the intersect_*-methods. Returns ------- @@ -1459,7 +1554,7 @@ def gdf_to_bool_ds(gdf, ds, da_name, keep_coords=None): cell, 0 otherwise. Grid dimensions according to ds and mfgrid. """ ds_out = util.get_ds_empty(ds, keep_coords=keep_coords) - ds_out[da_name] = gdf_to_bool_da(gdf, ds) + ds_out[da_name] = gdf_to_count_da(gdf, ds, ix=ix, buffer=buffer, **kwargs) return ds_out