diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2cc92c78ac8..b3c8bc53520 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,6 +56,11 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. +- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)`` + to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword + argument ``keep_attrs`` to change this setting. (:issue:`3376`, + :pull:`3801`) By `Andrew Thomas `_. + - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. diff --git a/xarray/core/common.py b/xarray/core/common.py index 582ae310061..e3739d6d039 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -753,6 +753,7 @@ def rolling( dim: Mapping[Hashable, int] = None, min_periods: int = None, center: bool = False, + keep_attrs: bool = None, **window_kwargs: int, ): """ @@ -769,6 +770,10 @@ def rolling( setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **window_kwargs : optional The keyword arguments form of ``dim``. One of dim or window_kwargs must be provided. @@ -810,8 +815,13 @@ def rolling( core.rolling.DataArrayRolling core.rolling.DatasetRolling """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + dim = either_dict_or_kwargs(dim, window_kwargs, "rolling") - return self._rolling_cls(self, dim, min_periods=min_periods, center=center) + return self._rolling_cls( + self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs + ) def rolling_exp( self, @@ -859,6 +869,7 @@ def coarsen( boundary: str = "exact", side: Union[str, Mapping[Hashable, str]] = "left", coord_func: str = "mean", + keep_attrs: bool = None, **window_kwargs: int, ): """ @@ -879,8 +890,12 @@ def coarsen( multiple of the window size. If 'trim', the excess entries are dropped. If 'pad', NA will be padded. side : 'left' or 'right' or mapping from dimension to 'left' or 'right' - coord_func : function (name) that is applied to the coordintes, + coord_func : function (name) that is applied to the coordinates, or a mapping from coordinate name to function (name). + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -915,9 +930,17 @@ def coarsen( core.rolling.DataArrayCoarsen core.rolling.DatasetCoarsen """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen") return self._coarsen_cls( - self, dim, boundary=boundary, side=side, coord_func=coord_func + self, + dim, + boundary=boundary, + side=side, + coord_func=coord_func, + keep_attrs=keep_attrs, ) def resample( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index ea6d72b2e03..61178cfb15f 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -7,6 +7,7 @@ from . import dtypes, duck_array_ops, utils from .dask_array_ops import dask_rolling_wrapper from .ops import inject_reduce_methods +from .options import _get_keep_attrs from .pycompat import dask_array_type try: @@ -42,10 +43,10 @@ class Rolling: DataArray.rolling """ - __slots__ = ("obj", "window", "min_periods", "center", "dim") - _attributes = ("window", "min_periods", "center", "dim") + __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs") + _attributes = ("window", "min_periods", "center", "dim", "keep_attrs") - def __init__(self, obj, windows, min_periods=None, center=False): + def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ Moving window object. @@ -65,6 +66,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -89,6 +94,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): self.center = center self.dim = dim + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + self.keep_attrs = keep_attrs + @property def _min_periods(self): return self.min_periods if self.min_periods is not None else self.window @@ -143,7 +152,7 @@ def count(self): class DataArrayRolling(Rolling): __slots__ = ("window_labels",) - def __init__(self, obj, windows, min_periods=None, center=False): + def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ Moving window object for DataArray. You should use DataArray.rolling() method to construct this object @@ -165,6 +174,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -177,7 +190,11 @@ def __init__(self, obj, windows, min_periods=None, center=False): Dataset.rolling Dataset.groupby """ - super().__init__(obj, windows, min_periods=min_periods, center=center) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + super().__init__( + obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs + ) self.window_labels = self.obj[self.dim] @@ -374,7 +391,7 @@ def _numpy_or_bottleneck_reduce( class DatasetRolling(Rolling): __slots__ = ("rollings",) - def __init__(self, obj, windows, min_periods=None, center=False): + def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ Moving window object for Dataset. You should use Dataset.rolling() method to construct this object @@ -396,6 +413,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -408,7 +429,7 @@ def __init__(self, obj, windows, min_periods=None, center=False): Dataset.groupby DataArray.groupby """ - super().__init__(obj, windows, min_periods, center) + super().__init__(obj, windows, min_periods, center, keep_attrs) if self.dim not in self.obj.dims: raise KeyError(self.dim) # Keep each Rolling object as a dictionary @@ -416,7 +437,9 @@ def __init__(self, obj, windows, min_periods=None, center=False): for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on slf.dim if self.dim in da.dims: - self.rollings[key] = DataArrayRolling(da, windows, min_periods, center) + self.rollings[key] = DataArrayRolling( + da, windows, min_periods, center, keep_attrs + ) def _dataset_implementation(self, func, **kwargs): from .dataset import Dataset @@ -427,7 +450,8 @@ def _dataset_implementation(self, func, **kwargs): reduced[key] = func(self.rollings[key], **kwargs) else: reduced[key] = self.obj[key] - return Dataset(reduced, coords=self.obj.coords) + attrs = self.obj.attrs if self.keep_attrs else {} + return Dataset(reduced, coords=self.obj.coords, attrs=attrs) def reduce(self, func, **kwargs): """Reduce the items in this group by applying `func` along some @@ -466,7 +490,7 @@ def _numpy_or_bottleneck_reduce( **kwargs, ) - def construct(self, window_dim, stride=1, fill_value=dtypes.NA): + def construct(self, window_dim, stride=1, fill_value=dtypes.NA, keep_attrs=None): """ Convert this rolling object to xr.Dataset, where the window dimension is stacked as a new dimension @@ -487,6 +511,9 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): from .dataset import Dataset + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + dataset = {} for key, da in self.obj.data_vars.items(): if self.dim in da.dims: @@ -509,10 +536,18 @@ class Coarsen: DataArray.coarsen """ - __slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess") + __slots__ = ( + "obj", + "boundary", + "coord_func", + "windows", + "side", + "trim_excess", + "keep_attrs", + ) _attributes = ("windows", "side", "trim_excess") - def __init__(self, obj, windows, boundary, side, coord_func): + def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): """ Moving window object. @@ -541,6 +576,7 @@ def __init__(self, obj, windows, boundary, side, coord_func): self.windows = windows self.side = side self.boundary = boundary + self.keep_attrs = keep_attrs absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] if absent_dims: @@ -626,6 +662,11 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool def wrapped_func(self, **kwargs): from .dataset import Dataset + if self.keep_attrs: + attrs = self.obj.attrs + else: + attrs = {} + reduced = {} for key, da in self.obj.data_vars.items(): reduced[key] = da.variable.coarsen( @@ -644,7 +685,7 @@ def wrapped_func(self, **kwargs): ) else: coords[c] = v.variable - return Dataset(reduced, coords=coords) + return Dataset(reduced, coords=coords, attrs=attrs) return wrapped_func diff --git a/xarray/core/variable.py b/xarray/core/variable.py index daa8678157b..62f9fde6a2e 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1949,6 +1949,9 @@ def _coarsen_reshape(self, windows, boundary, side): else: shape.append(variable.shape[i]) + keep_attrs = _get_keep_attrs(default=False) + variable.attrs = variable._attrs if keep_attrs else {} + return variable.data.reshape(shape), tuple(axes) @property diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5e254c37e44..7bcf9379ae8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5664,6 +5664,62 @@ def test_coarsen_coords_cftime(): np.testing.assert_array_equal(actual.time, expected_times) +def test_coarsen_keep_attrs(): + _attrs = {"units": "test", "long_name": "testing"} + + var1 = np.linspace(10, 15, 100) + var2 = np.linspace(5, 10, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={"var1": ("coord", var1), "var2": ("coord", var2)}, + coords={"coord": coords}, + attrs=_attrs, + ) + + # Test dropped attrs + dat = ds.coarsen(coord=5).mean() + assert dat.attrs == {} + + # Test kept attrs using dataset keyword + dat = ds.coarsen(coord=5, keep_attrs=True).mean() + assert dat.attrs == _attrs + + # Test kept attrs using global option + with set_options(keep_attrs=True): + dat = ds.coarsen(coord=5).mean() + assert dat.attrs == _attrs + + +def test_rolling_keep_attrs(): + _attrs = {"units": "test", "long_name": "testing"} + + var1 = np.linspace(10, 15, 100) + var2 = np.linspace(5, 10, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={"var1": ("coord", var1), "var2": ("coord", var2)}, + coords={"coord": coords}, + attrs=_attrs, + ) + + # Test dropped attrs + dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean() + assert dat.attrs == {} + + # Test kept attrs using dataset keyword + dat = ds.rolling( + dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True + ).mean() + assert dat.attrs == _attrs + + # Test kept attrs using global option + with set_options(keep_attrs=True): + dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean() + assert dat.attrs == _attrs + + def test_rolling_properties(ds): # catching invalid args with pytest.raises(ValueError, match="exactly one dim/window should"): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 62fde920b1e..c86ecd0121f 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -9,7 +9,7 @@ import pytz from xarray import Coordinate, Dataset, IndexVariable, Variable, set_options -from xarray.core import dtypes, indexing +from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.common import full_like, ones_like, zeros_like from xarray.core.indexing import ( BasicIndexer, @@ -1879,6 +1879,26 @@ def test_coarsen_2d(self): expected = self.cls(("x", "y"), [[10, 18], [42, 35]]) assert_equal(actual, expected) + # perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops]) + def test_coarsen_keep_attrs(self, operation="mean"): + _attrs = {"units": "test", "long_name": "testing"} + + test_func = getattr(duck_array_ops, operation, None) + + # Test dropped attrs + with set_options(keep_attrs=False): + new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen( + windows={"coord": 1}, func=test_func, boundary="exact", side="left" + ) + assert new.attrs == {} + + # Test kept attrs + with set_options(keep_attrs=True): + new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen( + windows={"coord": 1}, func=test_func, boundary="exact", side="left" + ) + assert new.attrs == _attrs + @requires_dask class TestVariableWithDask(VariableSubclassobjects):