From b92949211b30dc751d45613283378b65233a071c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 15 Oct 2020 01:07:13 +0200 Subject: [PATCH 01/16] rolling keep_attrs & default True --- doc/whats-new.rst | 4 ++ xarray/core/common.py | 6 +-- xarray/core/rolling.py | 57 ++++++++++++++++++--------- xarray/tests/test_dataarray.py | 36 +++++++++++++++++ xarray/tests/test_dataset.py | 70 +++++++++++++++++++++++++--------- 5 files changed, 134 insertions(+), 39 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0eb28aa550a..d41c8641e68 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,6 +32,10 @@ New Features By `Miguel Jimenez `_ and `Wei Ji Leong `_. - Unary & binary operations follow the ``keep_attrs`` flag (:issue:`3490`, :issue:`4065`, :issue:`3433`, :issue:`3595`, :pull:`4195`). By `Deepak Cherian `_. +- :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` now also respect the ``keep_attrs`` + keyword for their ``DataArray``s (previously only the global attrs were retained). ``keep_attrs`` + is now set to ``True`` per default for rolling operations (:issue:`4497`). + By `Mathias Hauser `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index eda31a16558..5b5f4549cb0 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -812,8 +812,8 @@ def rolling( center : bool or mapping, default: False Set the labels at the center of the window. keep_attrs : bool, optional - If True, the object's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new + If True (default), the object's attributes (`attrs`) will be copied + from the original object to the new one. If False, the new object will be returned without attributes. **window_kwargs : optional The keyword arguments form of ``dim``. @@ -863,8 +863,6 @@ def rolling( core.rolling.DataArrayRolling core.rolling.DatasetRolling """ - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) dim = either_dict_or_kwargs(dim, window_kwargs, "rolling") return self._rolling_cls( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 0bffc215ab0..dca870ac7ee 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -64,8 +64,8 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None center : bool, default: False Set the labels at the center of the window. keep_attrs : bool, optional - If True, the object's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new + If None (default) or True, the object's attributes (`attrs`) will be + copied from the original object to the new one. If False, the new object will be returned without attributes. Returns @@ -89,7 +89,7 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None self.min_periods = np.prod(self.window) if min_periods is None else min_periods if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) self.keep_attrs = keep_attrs def __repr__(self): @@ -181,8 +181,8 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None center : bool, default: False Set the labels at the center of the window. keep_attrs : bool, optional - If True, the object's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new + If None (default) or True, the object's attributes (`attrs`) will be + copied from the original object to the new one. If False, the new object will be returned without attributes. Returns @@ -196,8 +196,6 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None Dataset.rolling Dataset.groupby """ - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) super().__init__( obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs ) @@ -294,8 +292,14 @@ def construct( window = self.obj.variable.rolling_window( self.dim, self.window, window_dim, self.center, fill_value=fill_value ) + + attrs = self.obj.attrs if self.keep_attrs else {} + result = DataArray( - window, dims=self.obj.dims + tuple(window_dim), coords=self.obj.coords + window, + dims=self.obj.dims + tuple(window_dim), + coords=self.obj.coords, + attrs=attrs, ) return result.isel( **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} @@ -354,14 +358,16 @@ def reduce(self, func, **kwargs): for d in self.dim } windows = self.construct(rolling_dim) - result = windows.reduce(func, dim=list(rolling_dim.values()), **kwargs) + result = windows.reduce( + func, dim=list(rolling_dim.values()), keep_attrs=self.keep_attrs, **kwargs + ) # Find valid windows based on count. counts = self._counts() return result.where(counts >= self.min_periods) def _counts(self): - """ Number of non-nan entries in each rolling window. """ + """Number of non-nan entries in each rolling window.""" rolling_dim = { d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") @@ -422,7 +428,10 @@ def _bottleneck_reduce(self, func, **kwargs): if self.center[0]: values = values[valid] - result = DataArray(values, self.obj.coords) + + attrs = self.obj.attrs if self.keep_attrs else {} + + result = DataArray(values, self.obj.coords, attrs=attrs) return result @@ -473,8 +482,8 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None center : bool or mapping of hashable to bool, default: False Set the labels at the center of the window. keep_attrs : bool, optional - If True, the object's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new + If None (default) or True, the object's attributes (`attrs`) will be + copied from the original object to the new one. If False, the new object will be returned without attributes. Returns @@ -587,6 +596,15 @@ def construct( from .dataset import Dataset + if keep_attrs is not None: + warnings.warn( + "Passing 'keep_attrs' to 'construct' is deprecated." + " Please pass 'keep_attrs' directly to ``rolling``", + FutureWarning, + ) + else: + keep_attrs = self.keep_attrs + if window_dim is None: if len(window_dim_kwargs) == 0: raise ValueError( @@ -599,12 +617,9 @@ def construct( ) stride = self._mapping_to_list(stride, default=1) - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - dataset = {} for key, da in self.obj.data_vars.items(): - # keeps rollings only for the dataset depending on slf.dim + # keeps rollings only for the dataset depending on self.dim dims = [d for d in self.dim if d in da.dims] if len(dims) > 0: wi = {d: window_dim[i] for i, d in enumerate(self.dim) if d in da.dims} @@ -614,7 +629,13 @@ def construct( ) else: dataset[key] = da - return Dataset(dataset, coords=self.obj.coords).isel( + + if not keep_attrs: + dataset[key].attrs = {} + + attrs = self.obj.attrs if keep_attrs else {} + + return Dataset(dataset, coords=self.obj.coords, attrs=attrs).isel( **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} ) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ba424170349..a5dfd403c45 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6295,6 +6295,7 @@ def test_rolling_properties(da): # catching invalid args with pytest.raises(ValueError, match="window must be > 0"): da.rolling(time=-2) + with pytest.raises(ValueError, match="min_periods must be greater than zero"): da.rolling(time=2, min_periods=0) @@ -6538,6 +6539,41 @@ def test_ndrolling_construct(center, fill_value): assert_allclose(actual, expected) +@pytest.mark.parametrize( + "funcname, argument", + [("reduce", (np.mean,)), ("mean", ()), ("construct", ("window_dim",))], +) +def test_rolling_keep_attrs(funcname, argument): + + attrs_da = {"da_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + ds = DataArray( + data, + dims=("coord"), + coords={"coord": coords}, + attrs=attrs_da, + ) + + # attrs are now kept per default + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + dat = func(*argument) + assert dat.attrs == attrs_da + + # discard attrs + func = getattr(ds.rolling(dim={"coord": 5}, keep_attrs=False), funcname) + dat = func(*argument) + assert dat.attrs == {} + + # test discard attrs using global option + with set_options(keep_attrs=False): + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + dat = func(*argument) + assert dat.attrs == {} + + def test_raise_no_warning_for_nan_in_binary_ops(): with pytest.warns(None) as record: xr.DataArray([1, 2, np.NaN]) > 0 diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f1b51766831..a22e0d1ab45 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5995,33 +5995,69 @@ def test_coarsen_keep_attrs(): xr.testing.assert_identical(ds, ds2) -def test_rolling_keep_attrs(): - _attrs = {"units": "test", "long_name": "testing"} +@pytest.mark.parametrize( + "funcname, argument", + [("reduce", (np.mean,)), ("mean", ()), ("construct", ("window_dim",))], +) +def test_rolling_keep_attrs(funcname, argument): + global_attrs = {"units": "test", "long_name": "testing"} + attrs_da = {"da_attr": "test"} - var1 = np.linspace(10, 15, 100) - var2 = np.linspace(5, 10, 100) + data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) ds = Dataset( - data_vars={"var1": ("coord", var1), "var2": ("coord", var2)}, + data_vars={"da": ("coord", data)}, coords={"coord": coords}, - attrs=_attrs, + attrs=global_attrs, ) + ds.da.attrs = attrs_da - # Test dropped attrs - dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean() + # attrs are now kept per default + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + dat = func(*argument) + assert dat.attrs == global_attrs + assert dat.da.attrs == attrs_da + + # discard attrs + func = getattr(ds.rolling(dim={"coord": 5}, keep_attrs=False), funcname) + dat = func(*argument) assert dat.attrs == {} + assert dat.da.attrs == {} - # Test kept attrs using dataset keyword - dat = ds.rolling( - dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True - ).mean() - assert dat.attrs == _attrs + # test discard attrs using global option + with set_options(keep_attrs=False): + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + dat = func(*argument) + assert dat.attrs == {} + assert dat.da.attrs == {} - # Test kept attrs using global option - with set_options(keep_attrs=True): - dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean() - assert dat.attrs == _attrs + +def test_rolling_keep_attrs_construct_deprecated(): + global_attrs = {"units": "test", "long_name": "testing"} + attrs_da = {"da_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={"da": ("coord", data)}, + coords={"coord": coords}, + attrs=global_attrs, + ) + ds.da.attrs = attrs_da + + # deprecated option + with pytest.warns( + FutureWarning, match="Passing 'keep_attrs' to 'construct' is deprecated" + ): + dat = ds.rolling(dim={"coord": 5}, keep_attrs=True).construct( + "window_dim", keep_attrs=False + ) + + # takes precedence over 'keep_attrs' passed to rolling + assert dat.attrs == {} + assert dat.da.attrs == {} def test_rolling_properties(ds): From 90e202f57b6df22a355b8ba1ac2a5e323c78b531 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 19 Oct 2020 23:01:10 +0200 Subject: [PATCH 02/16] WIP --- xarray/core/rolling.py | 195 ++++++++++++++++++++++++----------------- 1 file changed, 113 insertions(+), 82 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index dca870ac7ee..8faad15a175 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -22,6 +22,10 @@ Parameters ---------- +keep_attrs : bool, default: None + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `{name}`. @@ -43,8 +47,8 @@ class Rolling: DataArray.rolling """ - __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs") - _attributes = ("window", "min_periods", "center", "dim", "keep_attrs") + __slots__ = ("obj", "window_len", "min_periods", "center", "window_dims", "keep_attrs") + _attributes = ("window_len", "min_periods", "center", "window_dims", "keep_attrs") def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ @@ -56,28 +60,23 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None Object to window. windows : mapping of hashable to int A mapping from the name of the dimension to create the rolling - exponential window along (e.g. `time`) to the size of the moving window. + window along (e.g. `time`) to the size of the moving window. min_periods : int, default: None Minimum number of observations in window required to have a value (otherwise result is NA). The default, None, is equivalent to setting min_periods equal to the size of the window. center : bool, default: False Set the labels at the center of the window. - keep_attrs : bool, optional - If None (default) or True, the object's attributes (`attrs`) will be - copied from the original object to the new one. If False, the new - object will be returned without attributes. Returns ------- rolling : type of input argument """ - self.dim, self.window = [], [] - for d, w in windows.items(): - self.dim.append(d) - if w <= 0: + self.window_dims = list(windows.keys()) + self.window_len = list(windows.values()) + + if any(wl <= 0 for wl in window_len): raise ValueError("window must be > 0") - self.window.append(w) self.center = self._mapping_to_list(center, default=False) self.obj = obj @@ -86,10 +85,15 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None if min_periods is not None and min_periods <= 0: raise ValueError("min_periods must be greater than zero or None") - self.min_periods = np.prod(self.window) if min_periods is None else min_periods + self.min_periods = np.prod(self.window_len) if min_periods is None else min_periods - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) + if keep_attrs is not None: + warnings.warn( + "Passing ``keep_attrs`` to ``rolling`` is deprecated and will raise an error" + " in xarray 0.18. Please pass ``keep_attrs`` directly to the applied function." + " Note that keep_attrs is now True per default.", + FutureWarning, + ) self.keep_attrs = keep_attrs def __repr__(self): @@ -97,22 +101,31 @@ def __repr__(self): attrs = [ "{k}->{v}{c}".format(k=k, v=w, c="(center)" if c else "") - for k, w, c in zip(self.dim, self.window, self.center) + for k, w, c in zip(self.window_dims, self.window_len, self.center) ] return "{klass} [{attrs}]".format( klass=self.__class__.__name__, attrs=",".join(attrs) ) def __len__(self): - return self.obj.sizes[self.dim] + return self.obj.sizes[self.window_dims] def _reduce_method(name: str) -> Callable: # type: ignore array_agg_func = getattr(duck_array_ops, name) bottleneck_move_func = getattr(bottleneck, "move_" + name, None) def method(self, **kwargs): + + keep_attrs = kwargs.pop("keep_attrs", None) + if keep_attrs is None: + # TODO: remove the self self.keep_attrs part after the deprecation + if self.keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + else: + keep_attrs = self.keep_attrs + return self._numpy_or_bottleneck_reduce( - array_agg_func, bottleneck_move_func, **kwargs + array_agg_func, bottleneck_move_func, keep_attrs=keep_attrs, **kwargs ) method.__name__ = name @@ -142,19 +155,19 @@ def _mapping_to_list( ): if utils.is_dict_like(arg): if allow_default: - return [arg.get(d, default) for d in self.dim] + return [arg.get(d, default) for d in self.window_dims] else: - for d in self.dim: + for d in self.window_dims: if d not in arg: raise KeyError(f"argument has no key {d}.") - return [arg[d] for d in self.dim] + return [arg[d] for d in self.window_dims] elif allow_allsame: # for single argument - return [arg] * len(self.dim) - elif len(self.dim) == 1: + return [arg] * len(self.window_dims) + elif len(self.window_dims) == 1: return [arg] else: raise ValueError( - "Mapping argument is necessary for {}d-rolling.".format(len(self.dim)) + "Mapping argument is necessary for {}d-rolling.".format(len(self.window_dims)) ) @@ -180,10 +193,6 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None setting min_periods equal to the size of the window. center : bool, default: False Set the labels at the center of the window. - keep_attrs : bool, optional - If None (default) or True, the object's attributes (`attrs`) will be - copied from the original object to the new one. If False, the new - object will be returned without attributes. Returns ------- @@ -201,24 +210,24 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None ) # TODO legacy attribute - self.window_labels = self.obj[self.dim[0]] + self.window_labels = self.obj[self.window_dims[0]] def __iter__(self): - if len(self.dim) > 1: + if len(self.window_dims) > 1: raise ValueError("__iter__ is only supported for 1d-rolling") stops = np.arange(1, len(self.window_labels) + 1) - starts = stops - int(self.window[0]) - starts[: int(self.window[0])] = 0 + starts = stops - int(self.window_len[0]) + starts[: int(self.window_len[0])] = 0 for (label, start, stop) in zip(self.window_labels, starts, stops): - window = self.obj.isel(**{self.dim[0]: slice(start, stop)}) + window = self.obj.isel(**{self.window_dims[0]: slice(start, stop)}) - counts = window.count(dim=self.dim[0]) + counts = window.count(dim=self.window_dims[0]) window = window.where(counts >= self.min_periods) yield (label, window) def construct( - self, window_dim=None, stride=1, fill_value=dtypes.NA, **window_dim_kwargs + self, window_dim=None, stride=1, fill_value=dtypes.NA, keep_attrs=None, **window_dim_kwargs ): """ Convert this rolling object to xr.DataArray, @@ -233,6 +242,10 @@ def construct( Size of stride for the rolling window. fill_value : default: dtypes.NA Filling value to match the dimension size. + keep_attrs : bool, default: None + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. If None uses the global default. **window_dim_kwargs : {dim: new_name, ...}, optional The keyword arguments form of ``window_dim``. @@ -276,13 +289,20 @@ def construct( """ from .dataarray import DataArray + + if keep_attrs is None: + # TODO: remove the self self.keep_attrs part after the deprecation + if self.keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + else: + keep_attrs = self.keep_attrs if window_dim is None: if len(window_dim_kwargs) == 0: raise ValueError( "Either window_dim or window_dim_kwargs need to be specified." ) - window_dim = {d: window_dim_kwargs[d] for d in self.dim} + window_dim = {d: window_dim_kwargs[d] for d in self.window_dims} window_dim = self._mapping_to_list( window_dim, allow_default=False, allow_allsame=False @@ -290,7 +310,7 @@ def construct( stride = self._mapping_to_list(stride, default=1) window = self.obj.variable.rolling_window( - self.dim, self.window, window_dim, self.center, fill_value=fill_value + self.window_dims, self.window_len, window_dim, self.center, fill_value=fill_value ) attrs = self.obj.attrs if self.keep_attrs else {} @@ -302,10 +322,10 @@ def construct( attrs=attrs, ) return result.isel( - **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} + **{d: slice(None, None, s) for d, s in zip(self.window_dims, stride)} ) - def reduce(self, func, **kwargs): + def reduce(self, func, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -315,6 +335,10 @@ def reduce(self, func, **kwargs): Function which can be called in the form `func(x, **kwargs)` to return the result of collapsing an np.ndarray over an the rolling dimension. + keep_attrs : bool, default: None + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -353,13 +377,21 @@ def reduce(self, func, **kwargs): [ 4., 9., 15., 18.]]) Dimensions without coordinates: a, b """ + + if keep_attrs is None: + # TODO: remove the self self.keep_attrs part after the deprecation + if self.keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + else: + keep_attrs = self.keep_attrs + rolling_dim = { d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") - for d in self.dim + for d in self.window_dims } windows = self.construct(rolling_dim) result = windows.reduce( - func, dim=list(rolling_dim.values()), keep_attrs=self.keep_attrs, **kwargs + func, dim=list(rolling_dim.values()), keep_attrs=keep_attrs, **kwargs ) # Find valid windows based on count. @@ -371,7 +403,7 @@ def _counts(self): rolling_dim = { d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") - for d in self.dim + for d in self.window_dims } # We use False as the fill_value instead of np.nan, since boolean # array is faster to be reduced than object array. @@ -380,8 +412,8 @@ def _counts(self): counts = ( self.obj.notnull() .rolling( - center={d: self.center[i] for i, d in enumerate(self.dim)}, - **{d: w for d, w in zip(self.dim, self.window)}, + center={d: self.center[i] for i, d in enumerate(self.window_dims)}, + **{d: w for d, w in zip(self.window_dims, self.window_len)}, ) .construct(rolling_dim, fill_value=False) .sum(dim=list(rolling_dim.values()), skipna=False) @@ -399,31 +431,31 @@ def _bottleneck_reduce(self, func, **kwargs): else: min_count = self.min_periods - axis = self.obj.get_axis_num(self.dim[0]) + axis = self.obj.get_axis_num(self.window_dims[0]) padded = self.obj.variable if self.center[0]: if is_duck_dask_array(padded.data): # Workaround to make the padded chunk size is larger than - # self.window-1 - shift = -(self.window[0] + 1) // 2 - offset = (self.window[0] - 1) // 2 + # self.window_len-1 + shift = -(self.window_len[0] + 1) // 2 + offset = (self.window_len[0] - 1) // 2 valid = (slice(None),) * axis + ( slice(offset, offset + self.obj.shape[axis]), ) else: - shift = (-self.window[0] // 2) + 1 + shift = (-self.window_len[0] // 2) + 1 valid = (slice(None),) * axis + (slice(-shift, None),) - padded = padded.pad({self.dim[0]: (0, -shift)}, mode="constant") + padded = padded.pad({self.window_dims[0]: (0, -shift)}, mode="constant") if is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") values = dask_rolling_wrapper( - func, padded.data, window=self.window[0], min_count=min_count, axis=axis + func, padded.data, window=self.window_len[0], min_count=min_count, axis=axis ) else: values = func( - padded.data, window=self.window[0], min_count=min_count, axis=axis + padded.data, window=self.window_len[0], min_count=min_count, axis=axis ) if self.center[0]: @@ -431,7 +463,7 @@ def _bottleneck_reduce(self, func, **kwargs): attrs = self.obj.attrs if self.keep_attrs else {} - result = DataArray(values, self.obj.coords, attrs=attrs) + result = DataArray(values, self.obj.coords, attrs=attrs, name=self.obj.name) return result @@ -440,7 +472,7 @@ def _numpy_or_bottleneck_reduce( ): if "dim" in kwargs: warnings.warn( - f"Reductions will be applied along the rolling dimension '{self.dim}'. Passing the 'dim' kwarg to reduction operations has no effect and will raise an error in xarray 0.16.0.", + f"Reductions will be applied along the rolling dimension '{self.window_dims}'. Passing the 'dim' kwarg to reduction operations has no effect and will raise an error in xarray 0.16.0.", DeprecationWarning, stacklevel=3, ) @@ -449,7 +481,7 @@ def _numpy_or_bottleneck_reduce( if ( bottleneck_move_func is not None and not is_duck_dask_array(self.obj.data) - and len(self.dim) == 1 + and len(self.window_dims) == 1 ): # TODO: renable bottleneck with dask after the issues # underlying https://github.com/pydata/xarray/issues/2940 are @@ -481,10 +513,6 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None setting min_periods equal to the size of the window. center : bool or mapping of hashable to bool, default: False Set the labels at the center of the window. - keep_attrs : bool, optional - If None (default) or True, the object's attributes (`attrs`) will be - copied from the original object to the new one. If False, the new - object will be returned without attributes. Returns ------- @@ -498,14 +526,15 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None DataArray.groupby """ super().__init__(obj, windows, min_periods, center, keep_attrs) - if any(d not in self.obj.dims for d in self.dim): - raise KeyError(self.dim) + if any(d not in self.obj.dims for d in self.window_dims): + raise KeyError(self.window_dims) + # Keep each Rolling object as a dictionary self.rollings = {} for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on slf.dim dims, center = [], {} - for i, d in enumerate(self.dim): + for i, d in enumerate(self.window_dims): if d in da.dims: dims.append(d) center[d] = self.center[i] @@ -513,22 +542,22 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None if len(dims) > 0: w = {d: windows[d] for d in dims} self.rollings[key] = DataArrayRolling( - da, w, min_periods, center, keep_attrs + da, w, min_periods, center, ) - def _dataset_implementation(self, func, **kwargs): + def _dataset_implementation(self, func, keep_attrs, **kwargs): from .dataset import Dataset reduced = {} for key, da in self.obj.data_vars.items(): - if any(d in da.dims for d in self.dim): + if any(d in da.dims for d in self.window_dims): reduced[key] = func(self.rollings[key], **kwargs) else: reduced[key] = self.obj[key] - attrs = self.obj.attrs if self.keep_attrs else {} - return Dataset(reduced, coords=self.obj.coords, attrs=attrs) + attrs = self.obj.attrs if keep_attrs else {} + return Dataset(reduced, coords=self.obj.coords, attrs=attrs, name=self.obj.name) - def reduce(self, func, **kwargs): + def reduce(self, func, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -538,6 +567,10 @@ def reduce(self, func, **kwargs): Function which can be called in the form `func(x, **kwargs)` to return the result of collapsing an np.ndarray over an the rolling dimension. + keep_attrs : bool, default: None + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -596,21 +629,19 @@ def construct( from .dataset import Dataset - if keep_attrs is not None: - warnings.warn( - "Passing 'keep_attrs' to 'construct' is deprecated." - " Please pass 'keep_attrs' directly to ``rolling``", - FutureWarning, - ) - else: - keep_attrs = self.keep_attrs + if keep_attrs is None: + # TODO: remove the self self.keep_attrs part after the deprecation + if self.keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + else: + keep_attrs = self.keep_attrs if window_dim is None: if len(window_dim_kwargs) == 0: raise ValueError( "Either window_dim or window_dim_kwargs need to be specified." ) - window_dim = {d: window_dim_kwargs[d] for d in self.dim} + window_dim = {d: window_dim_kwargs[d] for d in self.window_dims} window_dim = self._mapping_to_list( window_dim, allow_default=False, allow_allsame=False @@ -619,11 +650,11 @@ def construct( dataset = {} for key, da in self.obj.data_vars.items(): - # keeps rollings only for the dataset depending on self.dim - dims = [d for d in self.dim if d in da.dims] + # keeps rollings only for the dataset depending on self.window_dims + dims = [d for d in self.window_dims if d in da.dims] if len(dims) > 0: - wi = {d: window_dim[i] for i, d in enumerate(self.dim) if d in da.dims} - st = {d: stride[i] for i, d in enumerate(self.dim) if d in da.dims} + wi = {d: window_dim[i] for i, d in enumerate(self.window_dims) if d in da.dims} + st = {d: stride[i] for i, d in enumerate(self.window_dims) if d in da.dims} dataset[key] = self.rollings[key].construct( window_dim=wi, fill_value=fill_value, stride=st ) @@ -636,7 +667,7 @@ def construct( attrs = self.obj.attrs if keep_attrs else {} return Dataset(dataset, coords=self.obj.coords, attrs=attrs).isel( - **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} + **{d: slice(None, None, s) for d, s in zip(self.window_dims, stride)} ) From e4788439b5180c321bc5bcefa70fba564afe9084 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 24 Oct 2020 22:24:47 +0200 Subject: [PATCH 03/16] remove docstr on keep_attrs --- xarray/core/common.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 5b5f4549cb0..06911d4a221 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -577,13 +577,10 @@ def pipe( >>> def adder(data, arg): ... return data + arg - ... >>> def div(data, arg): ... return data / arg - ... >>> def sub_mult(data, sub_arg, mult_arg): ... return (data * mult_arg) - sub_arg - ... >>> x.pipe(adder, 2) Dimensions: (lat: 2, lon: 2) @@ -811,10 +808,6 @@ def rolling( setting min_periods equal to the size of the window. center : bool or mapping, default: False Set the labels at the center of the window. - keep_attrs : bool, optional - If True (default), the object's attributes (`attrs`) will be copied - from the original object to the new one. If False, the new - object will be returned without attributes. **window_kwargs : optional The keyword arguments form of ``dim``. One of dim or window_kwargs must be provided. From df2234707426b017d334c7c87a0405a3c4334a3c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 24 Oct 2020 22:26:59 +0200 Subject: [PATCH 04/16] adapt tests --- xarray/tests/test_dataarray.py | 14 +++++------ xarray/tests/test_dataset.py | 44 +++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index abfe8c8170f..ea25c9b9b21 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6558,19 +6558,19 @@ def test_rolling_keep_attrs(funcname, argument): # attrs are now kept per default func = getattr(ds.rolling(dim={"coord": 5}), funcname) - dat = func(*argument) - assert dat.attrs == attrs_da + result = func(*argument) + assert result.attrs == attrs_da # discard attrs - func = getattr(ds.rolling(dim={"coord": 5}, keep_attrs=False), funcname) - dat = func(*argument) - assert dat.attrs == {} + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + result = func(*argument, keep_attrs=False) + assert result.attrs == {} # test discard attrs using global option with set_options(keep_attrs=False): func = getattr(ds.rolling(dim={"coord": 5}), funcname) - dat = func(*argument) - assert dat.attrs == {} + result = func(*argument) + assert result.attrs == {} def test_raise_no_warning_for_nan_in_binary_ops(): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a22e0d1ab45..0fe6c922f33 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6015,22 +6015,23 @@ def test_rolling_keep_attrs(funcname, argument): # attrs are now kept per default func = getattr(ds.rolling(dim={"coord": 5}), funcname) - dat = func(*argument) - assert dat.attrs == global_attrs - assert dat.da.attrs == attrs_da + result = func(*argument) + assert result.attrs == global_attrs + assert result.da.attrs == attrs_da # discard attrs - func = getattr(ds.rolling(dim={"coord": 5}, keep_attrs=False), funcname) - dat = func(*argument) - assert dat.attrs == {} - assert dat.da.attrs == {} + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + assert result.da.attrs == {} # test discard attrs using global option + func = getattr(ds.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): - func = getattr(ds.rolling(dim={"coord": 5}), funcname) - dat = func(*argument) - assert dat.attrs == {} - assert dat.da.attrs == {} + result = func(*argument) + + assert result.attrs == {} + assert result.da.attrs == {} def test_rolling_keep_attrs_construct_deprecated(): @@ -6049,15 +6050,26 @@ def test_rolling_keep_attrs_construct_deprecated(): # deprecated option with pytest.warns( - FutureWarning, match="Passing 'keep_attrs' to 'construct' is deprecated" + FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" + ): + result = ds.rolling(dim={"coord": 5}, keep_attrs=False).construct("window_dim") + + assert result.attrs == {} + assert result.da.attrs == {} + + # the keep_attrs in the reduction function takes precedence + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" ): - dat = ds.rolling(dim={"coord": 5}, keep_attrs=True).construct( + result = ds.rolling(dim={"coord": 5}, keep_attrs=True).construct( "window_dim", keep_attrs=False ) - # takes precedence over 'keep_attrs' passed to rolling - assert dat.attrs == {} - assert dat.da.attrs == {} + assert result.attrs == {} + assert result.da.attrs == {} + + +keep_attrs = False def test_rolling_properties(ds): From cb30ce1976a6d58bbbe899b667dc5f3ad52d6eff Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 24 Oct 2020 22:27:23 +0200 Subject: [PATCH 05/16] rolling WIP --- xarray/core/rolling.py | 201 ++++++++++++++++++++++++++--------------- 1 file changed, 128 insertions(+), 73 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 8faad15a175..f1542441174 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -47,7 +47,14 @@ class Rolling: DataArray.rolling """ - __slots__ = ("obj", "window_len", "min_periods", "center", "window_dims", "keep_attrs") + __slots__ = ( + "obj", + "window_len", + "min_periods", + "center", + "window_dims", + "keep_attrs", + ) _attributes = ("window_len", "min_periods", "center", "window_dims", "keep_attrs") def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): @@ -74,9 +81,9 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None """ self.window_dims = list(windows.keys()) self.window_len = list(windows.values()) - - if any(wl <= 0 for wl in window_len): - raise ValueError("window must be > 0") + + if any(wl <= 0 for wl in self.window_len): + raise ValueError("window must be > 0") self.center = self._mapping_to_list(center, default=False) self.obj = obj @@ -85,7 +92,9 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None if min_periods is not None and min_periods <= 0: raise ValueError("min_periods must be greater than zero or None") - self.min_periods = np.prod(self.window_len) if min_periods is None else min_periods + self.min_periods = ( + np.prod(self.window_len) if min_periods is None else min_periods + ) if keep_attrs is not None: warnings.warn( @@ -115,15 +124,10 @@ def _reduce_method(name: str) -> Callable: # type: ignore bottleneck_move_func = getattr(bottleneck, "move_" + name, None) def method(self, **kwargs): - + keep_attrs = kwargs.pop("keep_attrs", None) - if keep_attrs is None: - # TODO: remove the self self.keep_attrs part after the deprecation - if self.keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - else: - keep_attrs = self.keep_attrs - + keep_attrs = self._get_keep_attrs(keep_attrs) + return self._numpy_or_bottleneck_reduce( array_agg_func, bottleneck_move_func, keep_attrs=keep_attrs, **kwargs ) @@ -143,8 +147,8 @@ def method(self, **kwargs): var = _reduce_method("var") median = _reduce_method("median") - def count(self): - rolling_count = self._counts() + def count(self, keep_attrs=None): + rolling_count = self._counts(keep_attrs=keep_attrs) enough_periods = rolling_count >= self.min_periods return rolling_count.where(enough_periods) @@ -167,9 +171,36 @@ def _mapping_to_list( return [arg] else: raise ValueError( - "Mapping argument is necessary for {}d-rolling.".format(len(self.window_dims)) + "Mapping argument is necessary for {}d-rolling.".format( + len(self.window_dims) + ) ) + def _get_keep_attrs(self, keep_attrs): + + if keep_attrs is None: + # TODO: uncomment the next line and remove the others after the deprecation + # keep_attrs = _get_keep_attrs(default=True) + + if self.keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + else: + keep_attrs = self.keep_attrs + + return keep_attrs + + @property + def dim(self): + warnings.warn("'dims' has been renamed to 'window_dims'", FutureWarning) + + return self.window_dims + + @property + def window(self): + warnings.warn("'window' has been renamed to 'window_len'", FutureWarning) + + return self.window_len + class DataArrayRolling(Rolling): __slots__ = ("window_labels",) @@ -227,7 +258,12 @@ def __iter__(self): yield (label, window) def construct( - self, window_dim=None, stride=1, fill_value=dtypes.NA, keep_attrs=None, **window_dim_kwargs + self, + window_dim=None, + stride=1, + fill_value=dtypes.NA, + keep_attrs=None, + **window_dim_kwargs, ): """ Convert this rolling object to xr.DataArray, @@ -237,8 +273,7 @@ def construct( ---------- window_dim : str or mapping, optional A mapping from dimension name to the new window dimension names. - Just a string can be used for 1d-rolling. - stride : int or mapping of int, optional + stride : int or mapping of int, default: 1 Size of stride for the rolling window. fill_value : default: dtypes.NA Filling value to match the dimension size. @@ -289,13 +324,8 @@ def construct( """ from .dataarray import DataArray - - if keep_attrs is None: - # TODO: remove the self self.keep_attrs part after the deprecation - if self.keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - else: - keep_attrs = self.keep_attrs + + keep_attrs = self._get_keep_attrs(keep_attrs) if window_dim is None: if len(window_dim_kwargs) == 0: @@ -310,16 +340,21 @@ def construct( stride = self._mapping_to_list(stride, default=1) window = self.obj.variable.rolling_window( - self.window_dims, self.window_len, window_dim, self.center, fill_value=fill_value + self.window_dims, + self.window_len, + window_dim, + self.center, + fill_value=fill_value, ) - attrs = self.obj.attrs if self.keep_attrs else {} + attrs = self.obj.attrs if keep_attrs else {} result = DataArray( window, dims=self.obj.dims + tuple(window_dim), coords=self.obj.coords, attrs=attrs, + name=self.obj.name, ) return result.isel( **{d: slice(None, None, s) for d, s in zip(self.window_dims, stride)} @@ -377,14 +412,9 @@ def reduce(self, func, keep_attrs=None, **kwargs): [ 4., 9., 15., 18.]]) Dimensions without coordinates: a, b """ - - if keep_attrs is None: - # TODO: remove the self self.keep_attrs part after the deprecation - if self.keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - else: - keep_attrs = self.keep_attrs - + + keep_attrs = self._get_keep_attrs(keep_attrs) + rolling_dim = { d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") for d in self.window_dims @@ -395,10 +425,10 @@ def reduce(self, func, keep_attrs=None, **kwargs): ) # Find valid windows based on count. - counts = self._counts() + counts = self._counts(keep_attrs=False) return result.where(counts >= self.min_periods) - def _counts(self): + def _counts(self, keep_attrs): """Number of non-nan entries in each rolling window.""" rolling_dim = { @@ -409,18 +439,19 @@ def _counts(self): # array is faster to be reduced than object array. # The use of skipna==False is also faster since it does not need to # copy the strided array. + center = {d: self.center[i] for i, d in enumerate(self.window_dims)} + windows = {d: w for d, w in zip(self.window_dims, self.window_len)} + counts = ( self.obj.notnull() - .rolling( - center={d: self.center[i] for i, d in enumerate(self.window_dims)}, - **{d: w for d, w in zip(self.window_dims, self.window_len)}, - ) + .rolling(center=center, **windows) .construct(rolling_dim, fill_value=False) - .sum(dim=list(rolling_dim.values()), skipna=False) + .sum(dim=list(rolling_dim.values()), skipna=False, keep_attrs=keep_attrs) ) + return counts - def _bottleneck_reduce(self, func, **kwargs): + def _bottleneck_reduce(self, func, keep_attrs, **kwargs): from .dataarray import DataArray # bottleneck doesn't allow min_count to be 0, although it should @@ -436,8 +467,8 @@ def _bottleneck_reduce(self, func, **kwargs): padded = self.obj.variable if self.center[0]: if is_duck_dask_array(padded.data): - # Workaround to make the padded chunk size is larger than - # self.window_len-1 + # workaround to make the padded chunk size larger than + # self.window_len - 1 shift = -(self.window_len[0] + 1) // 2 offset = (self.window_len[0] - 1) // 2 valid = (slice(None),) * axis + ( @@ -449,9 +480,13 @@ def _bottleneck_reduce(self, func, **kwargs): padded = padded.pad({self.window_dims[0]: (0, -shift)}, mode="constant") if is_duck_dask_array(padded.data): - raise AssertionError("should not be reachable") + # raise AssertionError("should not be reachable") values = dask_rolling_wrapper( - func, padded.data, window=self.window_len[0], min_count=min_count, axis=axis + func, + padded.data, + window=self.window_len[0], + min_count=min_count, + axis=axis, ) else: values = func( @@ -461,18 +496,18 @@ def _bottleneck_reduce(self, func, **kwargs): if self.center[0]: values = values[valid] - attrs = self.obj.attrs if self.keep_attrs else {} - - result = DataArray(values, self.obj.coords, attrs=attrs, name=self.obj.name) + attrs = self.obj.attrs if keep_attrs else {} - return result + return DataArray(values, self.obj.coords, attrs=attrs, name=self.obj.name) def _numpy_or_bottleneck_reduce( - self, array_agg_func, bottleneck_move_func, **kwargs + self, array_agg_func, bottleneck_move_func, keep_attrs, **kwargs ): if "dim" in kwargs: warnings.warn( - f"Reductions will be applied along the rolling dimension '{self.window_dims}'. Passing the 'dim' kwarg to reduction operations has no effect and will raise an error in xarray 0.16.0.", + f"Reductions will be applied along the rolling dimension " + f"'{self.window_dims}'. Passing the 'dim' kwarg to reduction " + f"operations has no effect.", DeprecationWarning, stacklevel=3, ) @@ -486,9 +521,11 @@ def _numpy_or_bottleneck_reduce( # TODO: renable bottleneck with dask after the issues # underlying https://github.com/pydata/xarray/issues/2940 are # fixed. - return self._bottleneck_reduce(bottleneck_move_func, **kwargs) + return self._bottleneck_reduce( + bottleneck_move_func, keep_attrs=keep_attrs, **kwargs + ) else: - return self.reduce(array_agg_func, **kwargs) + return self.reduce(array_agg_func, keep_attrs=keep_attrs, **kwargs) class DatasetRolling(Rolling): @@ -526,13 +563,14 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None DataArray.groupby """ super().__init__(obj, windows, min_periods, center, keep_attrs) + if any(d not in self.obj.dims for d in self.window_dims): raise KeyError(self.window_dims) - # Keep each Rolling object as a dictionary + # Keep a rolling object for each DataArray in a dictionary self.rollings = {} for key, da in self.obj.data_vars.items(): - # keeps rollings only for the dataset depending on slf.dim + # keeps rollings only for the dataset depending on self._window_dims dims, center = [], {} for i, d in enumerate(self.window_dims): if d in da.dims: @@ -542,20 +580,29 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None if len(dims) > 0: w = {d: windows[d] for d in dims} self.rollings[key] = DataArrayRolling( - da, w, min_periods, center, + da, + w, + min_periods, + center, ) def _dataset_implementation(self, func, keep_attrs, **kwargs): from .dataset import Dataset + keep_attrs = self._get_keep_attrs(keep_attrs) + reduced = {} for key, da in self.obj.data_vars.items(): if any(d in da.dims for d in self.window_dims): - reduced[key] = func(self.rollings[key], **kwargs) + reduced[key] = func(self.rollings[key], keep_attrs=keep_attrs, **kwargs) else: reduced[key] = self.obj[key] + # we need to delete the attrs of the copied DataArray + if not keep_attrs: + reduced[key].attrs = {} + attrs = self.obj.attrs if keep_attrs else {} - return Dataset(reduced, coords=self.obj.coords, attrs=attrs, name=self.obj.name) + return Dataset(reduced, coords=self.obj.coords, attrs=attrs) def reduce(self, func, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some @@ -580,14 +627,18 @@ def reduce(self, func, keep_attrs=None, **kwargs): Array with summarized data. """ return self._dataset_implementation( - functools.partial(DataArrayRolling.reduce, func=func), **kwargs + functools.partial(DataArrayRolling.reduce, func=func), + keep_attrs=keep_attrs, + **kwargs, ) - def _counts(self): - return self._dataset_implementation(DataArrayRolling._counts) + def _counts(self, keep_attrs): + return self._dataset_implementation( + DataArrayRolling._counts, keep_attrs=keep_attrs + ) def _numpy_or_bottleneck_reduce( - self, array_agg_func, bottleneck_move_func, **kwargs + self, array_agg_func, bottleneck_move_func, keep_attrs, **kwargs ): return self._dataset_implementation( functools.partial( @@ -595,6 +646,7 @@ def _numpy_or_bottleneck_reduce( array_agg_func=array_agg_func, bottleneck_move_func=bottleneck_move_func, ), + keep_attrs=keep_attrs, **kwargs, ) @@ -629,12 +681,7 @@ def construct( from .dataset import Dataset - if keep_attrs is None: - # TODO: remove the self self.keep_attrs part after the deprecation - if self.keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - else: - keep_attrs = self.keep_attrs + keep_attrs = self._get_keep_attrs(keep_attrs) if window_dim is None: if len(window_dim_kwargs) == 0: @@ -653,14 +700,22 @@ def construct( # keeps rollings only for the dataset depending on self.window_dims dims = [d for d in self.window_dims if d in da.dims] if len(dims) > 0: - wi = {d: window_dim[i] for i, d in enumerate(self.window_dims) if d in da.dims} - st = {d: stride[i] for i, d in enumerate(self.window_dims) if d in da.dims} + wi = { + d: window_dim[i] + for i, d in enumerate(self.window_dims) + if d in da.dims + } + st = { + d: stride[i] for i, d in enumerate(self.window_dims) if d in da.dims + } + dataset[key] = self.rollings[key].construct( window_dim=wi, fill_value=fill_value, stride=st ) else: dataset[key] = da + # as the DataArrays can be copied we need to delete the attrs if not keep_attrs: dataset[key].attrs = {} From 8fcb2d9a90fc543fa880a8f69d60c7e5149688bc Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 24 Oct 2020 22:33:30 +0200 Subject: [PATCH 06/16] small update --- xarray/core/rolling.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index f1542441174..d11e0ba6754 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -123,9 +123,8 @@ def _reduce_method(name: str) -> Callable: # type: ignore array_agg_func = getattr(duck_array_ops, name) bottleneck_move_func = getattr(bottleneck, "move_" + name, None) - def method(self, **kwargs): + def method(self, keep_attrs=None, **kwargs): - keep_attrs = kwargs.pop("keep_attrs", None) keep_attrs = self._get_keep_attrs(keep_attrs) return self._numpy_or_bottleneck_reduce( From dbc56d58bd9d5a11a87e534b9873d16dcad4a5b3 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sat, 24 Oct 2020 23:35:35 +0200 Subject: [PATCH 07/16] update docs --- doc/whats-new.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8bf9377b4ca..4d5f3c284f3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,11 @@ v0.16.2 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ +- :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` no longer support passing ``keep_attrs`` + via its constructor. Pass ``keep_attrs`` via the applied function, i.e. use + ``ds.rolling(...).mean(keep_attrs=False)`` instead of ``ds.rolling(..., keep_attrs=False).mean()`` + Rolling operations now keep their attributes per default (:pull:`4510`). + By `Mathias Hauser `_. New Features ~~~~~~~~~~~~ @@ -32,10 +37,6 @@ New Features By `Miguel Jimenez `_ and `Wei Ji Leong `_. - Unary & binary operations follow the ``keep_attrs`` flag (:issue:`3490`, :issue:`4065`, :issue:`3433`, :issue:`3595`, :pull:`4195`). By `Deepak Cherian `_. -- :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` now also respect the ``keep_attrs`` - keyword for their ``DataArray``s (previously only the global attrs were retained). ``keep_attrs`` - is now set to ``True`` per default for rolling operations (:issue:`4497`). - By `Mathias Hauser `_. Bug fixes ~~~~~~~~~ @@ -54,6 +55,9 @@ Bug fixes By `Mathias Hauser `_. - :py:func:`combine_by_coords` now raises an informative error when passing coordinates with differing calendars (:issue:`4495`). By `Mathias Hauser `_. +- :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` now also keep the attributes of of (wrapped) + ``DataArray`` objects, previously only the global attributes were retained (:issue:`4497`, :pull:`4510`). + By `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ From dddc4baaa501d8e65100ca73933322a97dbcdc07 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 26 Oct 2020 09:08:29 +0100 Subject: [PATCH 08/16] update tests --- xarray/tests/test_dataarray.py | 54 +++++++++++++++++++++++++++++----- xarray/tests/test_dataset.py | 25 ++++++++++------ 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ea25c9b9b21..4601c2ea2ba 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6315,7 +6315,7 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods): ) assert_array_equal(actual.values, expected) - with pytest.warns(DeprecationWarning, match="Reductions will be applied"): + with pytest.warns(DeprecationWarning, match="Reductions are applied"): getattr(rolling_obj, name)(dim="time") # Test center @@ -6334,7 +6334,7 @@ def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window): rolling_obj = da_dask.rolling(time=window, min_periods=min_periods, center=center) actual = getattr(rolling_obj, name)().load() if name != "count": - with pytest.warns(DeprecationWarning, match="Reductions will be applied"): + with pytest.warns(DeprecationWarning, match="Reductions are applied"): getattr(rolling_obj, name)(dim="time") # numpy version rolling_obj = da_dask.load().rolling( @@ -6540,7 +6540,12 @@ def test_ndrolling_construct(center, fill_value): @pytest.mark.parametrize( "funcname, argument", - [("reduce", (np.mean,)), ("mean", ()), ("construct", ("window_dim",))], + [ + ("reduce", (np.mean,)), + ("mean", ()), + ("construct", ("window_dim",)), + ("count", ()), + ], ) def test_rolling_keep_attrs(funcname, argument): @@ -6549,7 +6554,7 @@ def test_rolling_keep_attrs(funcname, argument): data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) - ds = DataArray( + da = DataArray( data, dims=("coord"), coords={"coord": coords}, @@ -6557,19 +6562,52 @@ def test_rolling_keep_attrs(funcname, argument): ) # attrs are now kept per default - func = getattr(ds.rolling(dim={"coord": 5}), funcname) + func = getattr(da.rolling(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == attrs_da # discard attrs - func = getattr(ds.rolling(dim={"coord": 5}), funcname) + func = getattr(da.rolling(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} # test discard attrs using global option + func = getattr(da.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): - func = getattr(ds.rolling(dim={"coord": 5}), funcname) - result = func(*argument) + result = func(*argument) + assert result.attrs == {} + + +def test_rolling_keep_attrs_deprecated(): + + attrs_da = {"da_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + da = DataArray( + data, + dims=("coord"), + coords={"coord": coords}, + attrs=attrs_da, + ) + + # deprecated option + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" + ): + result = da.rolling(dim={"coord": 5}, keep_attrs=False).construct("window_dim") + + assert result.attrs == {} + + # the keep_attrs in the reduction function takes precedence + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``rolling`` is deprecated" + ): + result = da.rolling(dim={"coord": 5}, keep_attrs=True).construct( + "window_dim", keep_attrs=False + ) + assert result.attrs == {} diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 0fe6c922f33..2e6e2f69554 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5997,33 +5997,42 @@ def test_coarsen_keep_attrs(): @pytest.mark.parametrize( "funcname, argument", - [("reduce", (np.mean,)), ("mean", ()), ("construct", ("window_dim",))], + [ + ("reduce", (np.mean,)), + ("mean", ()), + ("construct", ("window_dim",)), + ("count", ()), + ], ) def test_rolling_keep_attrs(funcname, argument): global_attrs = {"units": "test", "long_name": "testing"} - attrs_da = {"da_attr": "test"} + da_attrs = {"da_attr": "test"} + da_not_rolled_attrs = {"da_not_rolled_attr": "test"} data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) ds = Dataset( - data_vars={"da": ("coord", data)}, + data_vars={"da": ("coord", data), "da_not_rolled": ("no_coord", data)}, coords={"coord": coords}, attrs=global_attrs, ) - ds.da.attrs = attrs_da + ds.da.attrs = da_attrs + ds.da_not_rolled.attrs = da_not_rolled_attrs # attrs are now kept per default func = getattr(ds.rolling(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == global_attrs - assert result.da.attrs == attrs_da + assert result.da.attrs == da_attrs + assert ds.da_not_rolled.attrs == da_not_rolled_attrs # discard attrs func = getattr(ds.rolling(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.da.attrs == {} + assert ds.da_not_rolled.attrs == {} # test discard attrs using global option func = getattr(ds.rolling(dim={"coord": 5}), funcname) @@ -6032,9 +6041,10 @@ def test_rolling_keep_attrs(funcname, argument): assert result.attrs == {} assert result.da.attrs == {} + assert ds.da_not_rolled.attrs == {} -def test_rolling_keep_attrs_construct_deprecated(): +def test_rolling_keep_attrs_deprecated(): global_attrs = {"units": "test", "long_name": "testing"} attrs_da = {"da_attr": "test"} @@ -6069,9 +6079,6 @@ def test_rolling_keep_attrs_construct_deprecated(): assert result.da.attrs == {} -keep_attrs = False - - def test_rolling_properties(ds): # catching invalid args with pytest.raises(ValueError, match="window must be > 0"): From 52698b073b7c8b5fff5fa43ff68e3583cf026b7e Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 26 Oct 2020 09:10:31 +0100 Subject: [PATCH 09/16] undo refactoring --- xarray/core/rolling.py | 157 +++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 100 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index d11e0ba6754..85e7b7b62bf 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -47,15 +47,8 @@ class Rolling: DataArray.rolling """ - __slots__ = ( - "obj", - "window_len", - "min_periods", - "center", - "window_dims", - "keep_attrs", - ) - _attributes = ("window_len", "min_periods", "center", "window_dims", "keep_attrs") + __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs") + _attributes = ("window", "min_periods", "center", "dim", "keep_attrs") def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ @@ -79,11 +72,12 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None ------- rolling : type of input argument """ - self.window_dims = list(windows.keys()) - self.window_len = list(windows.values()) - - if any(wl <= 0 for wl in self.window_len): - raise ValueError("window must be > 0") + self.dim, self.window = [], [] + for d, w in windows.items(): + self.dim.append(d) + if w <= 0: + raise ValueError("window must be > 0") + self.window.append(w) self.center = self._mapping_to_list(center, default=False) self.obj = obj @@ -92,9 +86,7 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None if min_periods is not None and min_periods <= 0: raise ValueError("min_periods must be greater than zero or None") - self.min_periods = ( - np.prod(self.window_len) if min_periods is None else min_periods - ) + self.min_periods = np.prod(self.window) if min_periods is None else min_periods if keep_attrs is not None: warnings.warn( @@ -110,14 +102,14 @@ def __repr__(self): attrs = [ "{k}->{v}{c}".format(k=k, v=w, c="(center)" if c else "") - for k, w, c in zip(self.window_dims, self.window_len, self.center) + for k, w, c in zip(self.dim, self.window, self.center) ] return "{klass} [{attrs}]".format( klass=self.__class__.__name__, attrs=",".join(attrs) ) def __len__(self): - return self.obj.sizes[self.window_dims] + return self.obj.sizes[self.dim] def _reduce_method(name: str) -> Callable: # type: ignore array_agg_func = getattr(duck_array_ops, name) @@ -158,21 +150,19 @@ def _mapping_to_list( ): if utils.is_dict_like(arg): if allow_default: - return [arg.get(d, default) for d in self.window_dims] + return [arg.get(d, default) for d in self.dim] else: - for d in self.window_dims: + for d in self.dim: if d not in arg: raise KeyError(f"argument has no key {d}.") - return [arg[d] for d in self.window_dims] + return [arg[d] for d in self.dim] elif allow_allsame: # for single argument - return [arg] * len(self.window_dims) - elif len(self.window_dims) == 1: + return [arg] * len(self.dim) + elif len(self.dim) == 1: return [arg] else: raise ValueError( - "Mapping argument is necessary for {}d-rolling.".format( - len(self.window_dims) - ) + "Mapping argument is necessary for {}d-rolling.".format(len(self.dim)) ) def _get_keep_attrs(self, keep_attrs): @@ -188,18 +178,6 @@ def _get_keep_attrs(self, keep_attrs): return keep_attrs - @property - def dim(self): - warnings.warn("'dims' has been renamed to 'window_dims'", FutureWarning) - - return self.window_dims - - @property - def window(self): - warnings.warn("'window' has been renamed to 'window_len'", FutureWarning) - - return self.window_len - class DataArrayRolling(Rolling): __slots__ = ("window_labels",) @@ -240,18 +218,18 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None ) # TODO legacy attribute - self.window_labels = self.obj[self.window_dims[0]] + self.window_labels = self.obj[self.dim[0]] def __iter__(self): - if len(self.window_dims) > 1: + if len(self.dim) > 1: raise ValueError("__iter__ is only supported for 1d-rolling") stops = np.arange(1, len(self.window_labels) + 1) - starts = stops - int(self.window_len[0]) - starts[: int(self.window_len[0])] = 0 + starts = stops - int(self.window[0]) + starts[: int(self.window[0])] = 0 for (label, start, stop) in zip(self.window_labels, starts, stops): - window = self.obj.isel(**{self.window_dims[0]: slice(start, stop)}) + window = self.obj.isel(**{self.dim[0]: slice(start, stop)}) - counts = window.count(dim=self.window_dims[0]) + counts = window.count(dim=self.dim[0]) window = window.where(counts >= self.min_periods) yield (label, window) @@ -331,7 +309,7 @@ def construct( raise ValueError( "Either window_dim or window_dim_kwargs need to be specified." ) - window_dim = {d: window_dim_kwargs[d] for d in self.window_dims} + window_dim = {d: window_dim_kwargs[d] for d in self.dim} window_dim = self._mapping_to_list( window_dim, allow_default=False, allow_allsame=False @@ -339,11 +317,7 @@ def construct( stride = self._mapping_to_list(stride, default=1) window = self.obj.variable.rolling_window( - self.window_dims, - self.window_len, - window_dim, - self.center, - fill_value=fill_value, + self.dim, self.window, window_dim, self.center, fill_value=fill_value ) attrs = self.obj.attrs if keep_attrs else {} @@ -356,7 +330,7 @@ def construct( name=self.obj.name, ) return result.isel( - **{d: slice(None, None, s) for d, s in zip(self.window_dims, stride)} + **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} ) def reduce(self, func, keep_attrs=None, **kwargs): @@ -416,7 +390,7 @@ def reduce(self, func, keep_attrs=None, **kwargs): rolling_dim = { d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") - for d in self.window_dims + for d in self.dim } windows = self.construct(rolling_dim) result = windows.reduce( @@ -432,22 +406,21 @@ def _counts(self, keep_attrs): rolling_dim = { d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") - for d in self.window_dims + for d in self.dim } # We use False as the fill_value instead of np.nan, since boolean # array is faster to be reduced than object array. # The use of skipna==False is also faster since it does not need to # copy the strided array. - center = {d: self.center[i] for i, d in enumerate(self.window_dims)} - windows = {d: w for d, w in zip(self.window_dims, self.window_len)} - counts = ( self.obj.notnull() - .rolling(center=center, **windows) - .construct(rolling_dim, fill_value=False) + .rolling( + center={d: self.center[i] for i, d in enumerate(self.dim)}, + **{d: w for d, w in zip(self.dim, self.window)}, + ) + .construct(rolling_dim, fill_value=False, keep_attrs=keep_attrs) .sum(dim=list(rolling_dim.values()), skipna=False, keep_attrs=keep_attrs) ) - return counts def _bottleneck_reduce(self, func, keep_attrs, **kwargs): @@ -461,35 +434,31 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): else: min_count = self.min_periods - axis = self.obj.get_axis_num(self.window_dims[0]) + axis = self.obj.get_axis_num(self.dim[0]) padded = self.obj.variable if self.center[0]: if is_duck_dask_array(padded.data): # workaround to make the padded chunk size larger than - # self.window_len - 1 - shift = -(self.window_len[0] + 1) // 2 - offset = (self.window_len[0] - 1) // 2 + # self.window - 1 + shift = -(self.window[0] + 1) // 2 + offset = (self.window[0] - 1) // 2 valid = (slice(None),) * axis + ( slice(offset, offset + self.obj.shape[axis]), ) else: - shift = (-self.window_len[0] // 2) + 1 + shift = (-self.window[0] // 2) + 1 valid = (slice(None),) * axis + (slice(-shift, None),) - padded = padded.pad({self.window_dims[0]: (0, -shift)}, mode="constant") + padded = padded.pad({self.dim[0]: (0, -shift)}, mode="constant") if is_duck_dask_array(padded.data): - # raise AssertionError("should not be reachable") + raise AssertionError("should not be reachable") values = dask_rolling_wrapper( - func, - padded.data, - window=self.window_len[0], - min_count=min_count, - axis=axis, + func, padded.data, window=self.window[0], min_count=min_count, axis=axis ) else: values = func( - padded.data, window=self.window_len[0], min_count=min_count, axis=axis + padded.data, window=self.window[0], min_count=min_count, axis=axis ) if self.center[0]: @@ -504,8 +473,8 @@ def _numpy_or_bottleneck_reduce( ): if "dim" in kwargs: warnings.warn( - f"Reductions will be applied along the rolling dimension " - f"'{self.window_dims}'. Passing the 'dim' kwarg to reduction " + f"Reductions are applied along the rolling dimension " + f"'{self.dim}'. Passing the 'dim' kwarg to reduction " f"operations has no effect.", DeprecationWarning, stacklevel=3, @@ -515,7 +484,7 @@ def _numpy_or_bottleneck_reduce( if ( bottleneck_move_func is not None and not is_duck_dask_array(self.obj.data) - and len(self.window_dims) == 1 + and len(self.dim) == 1 ): # TODO: renable bottleneck with dask after the issues # underlying https://github.com/pydata/xarray/issues/2940 are @@ -562,28 +531,22 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None DataArray.groupby """ super().__init__(obj, windows, min_periods, center, keep_attrs) - - if any(d not in self.obj.dims for d in self.window_dims): - raise KeyError(self.window_dims) + if any(d not in self.obj.dims for d in self.dim): + raise KeyError(self.dim) # Keep a rolling object for each DataArray in a dictionary self.rollings = {} for key, da in self.obj.data_vars.items(): - # keeps rollings only for the dataset depending on self._window_dims + # keeps rollings only for the dataset depending on self.dim dims, center = [], {} - for i, d in enumerate(self.window_dims): + for i, d in enumerate(self.dim): if d in da.dims: dims.append(d) center[d] = self.center[i] if len(dims) > 0: w = {d: windows[d] for d in dims} - self.rollings[key] = DataArrayRolling( - da, - w, - min_periods, - center, - ) + self.rollings[key] = DataArrayRolling(da, w, min_periods, center) def _dataset_implementation(self, func, keep_attrs, **kwargs): from .dataset import Dataset @@ -592,7 +555,7 @@ def _dataset_implementation(self, func, keep_attrs, **kwargs): reduced = {} for key, da in self.obj.data_vars.items(): - if any(d in da.dims for d in self.window_dims): + if any(d in da.dims for d in self.dim): reduced[key] = func(self.rollings[key], keep_attrs=keep_attrs, **kwargs) else: reduced[key] = self.obj[key] @@ -687,7 +650,7 @@ def construct( raise ValueError( "Either window_dim or window_dim_kwargs need to be specified." ) - window_dim = {d: window_dim_kwargs[d] for d in self.window_dims} + window_dim = {d: window_dim_kwargs[d] for d in self.dim} window_dim = self._mapping_to_list( window_dim, allow_default=False, allow_allsame=False @@ -696,17 +659,11 @@ def construct( dataset = {} for key, da in self.obj.data_vars.items(): - # keeps rollings only for the dataset depending on self.window_dims - dims = [d for d in self.window_dims if d in da.dims] + # keeps rollings only for the dataset depending on self.dim + dims = [d for d in self.dim if d in da.dims] if len(dims) > 0: - wi = { - d: window_dim[i] - for i, d in enumerate(self.window_dims) - if d in da.dims - } - st = { - d: stride[i] for i, d in enumerate(self.window_dims) if d in da.dims - } + wi = {d: window_dim[i] for i, d in enumerate(self.dim) if d in da.dims} + st = {d: stride[i] for i, d in enumerate(self.dim) if d in da.dims} dataset[key] = self.rollings[key].construct( window_dim=wi, fill_value=fill_value, stride=st @@ -721,7 +678,7 @@ def construct( attrs = self.obj.attrs if keep_attrs else {} return Dataset(dataset, coords=self.obj.coords, attrs=attrs).isel( - **{d: slice(None, None, s) for d, s in zip(self.window_dims, stride)} + **{d: slice(None, None, s) for d, s in zip(self.dim, stride)} ) From 896513f987d4616bbb37bc8fffb85b5297554d4f Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 26 Oct 2020 10:34:02 +0100 Subject: [PATCH 10/16] some more fixes --- doc/whats-new.rst | 2 +- xarray/core/rolling.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4d5f3c284f3..8a2c7a369c8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,7 +55,7 @@ Bug fixes By `Mathias Hauser `_. - :py:func:`combine_by_coords` now raises an informative error when passing coordinates with differing calendars (:issue:`4495`). By `Mathias Hauser `_. -- :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` now also keep the attributes of of (wrapped) +- :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` now also keep the attributes and names of of (wrapped) ``DataArray`` objects, previously only the global attributes were retained (:issue:`4497`, :pull:`4510`). By `Mathias Hauser `_. diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 85e7b7b62bf..43e8a91e651 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -90,9 +90,9 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None if keep_attrs is not None: warnings.warn( - "Passing ``keep_attrs`` to ``rolling`` is deprecated and will raise an error" - " in xarray 0.18. Please pass ``keep_attrs`` directly to the applied function." - " Note that keep_attrs is now True per default.", + "Passing ``keep_attrs`` to ``rolling`` is deprecated and will raise an" + " error in xarray 0.18. Please pass ``keep_attrs`` directly to the" + " applied function. Note that keep_attrs is now True per default.", FutureWarning, ) self.keep_attrs = keep_attrs @@ -139,6 +139,7 @@ def method(self, keep_attrs=None, **kwargs): median = _reduce_method("median") def count(self, keep_attrs=None): + keep_attrs = self._get_keep_attrs(keep_attrs) rolling_count = self._counts(keep_attrs=keep_attrs) enough_periods = rolling_count >= self.min_periods return rolling_count.where(enough_periods) @@ -473,7 +474,7 @@ def _numpy_or_bottleneck_reduce( ): if "dim" in kwargs: warnings.warn( - f"Reductions are applied along the rolling dimension " + f"Reductions are applied along the rolling dimension(s) " f"'{self.dim}'. Passing the 'dim' kwarg to reduction " f"operations has no effect.", DeprecationWarning, @@ -533,8 +534,7 @@ def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None super().__init__(obj, windows, min_periods, center, keep_attrs) if any(d not in self.obj.dims for d in self.dim): raise KeyError(self.dim) - - # Keep a rolling object for each DataArray in a dictionary + # Keep each Rolling object as a dictionary self.rollings = {} for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on self.dim From e9de7d5e56fc969ad3d53c450519a18ee0032b5e Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 26 Oct 2020 10:41:29 +0100 Subject: [PATCH 11/16] more doc fixes --- xarray/core/rolling.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 43e8a91e651..5dfd0a04903 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -24,8 +24,8 @@ ---------- keep_attrs : bool, default: None If True, the attributes (``attrs``) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. If None uses the global default. + object to the new one. If False, the new object will be returned + without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `{name}`. @@ -257,8 +257,8 @@ def construct( Filling value to match the dimension size. keep_attrs : bool, default: None If True, the attributes (``attrs``) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. If None uses the global default. + object to the new one. If False, the new object will be returned + without attributes. If None uses the global default. **window_dim_kwargs : {dim: new_name, ...}, optional The keyword arguments form of ``window_dim``. @@ -346,8 +346,8 @@ def reduce(self, func, keep_attrs=None, **kwargs): np.ndarray over an the rolling dimension. keep_attrs : bool, default: None If True, the attributes (``attrs``) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. If None uses the global default. + object to the new one. If False, the new object will be returned + without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -578,8 +578,8 @@ def reduce(self, func, keep_attrs=None, **kwargs): np.ndarray over an the rolling dimension. keep_attrs : bool, default: None If True, the attributes (``attrs``) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. If None uses the global default. + object to the new one. If False, the new object will be returned + without attributes. If None uses the global default. **kwargs : dict Additional keyword arguments passed on to `func`. From 3596208748f878b5e17b2fa361d267d02e187eaa Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 30 Oct 2020 11:53:06 +0100 Subject: [PATCH 12/16] test the name is conserved --- xarray/tests/test_dataarray.py | 8 ++++---- xarray/tests/test_dataset.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 1b92034919b..efd27d53cc3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6558,27 +6558,27 @@ def test_rolling_keep_attrs(funcname, argument): coords = np.linspace(1, 10, 100) da = DataArray( - data, - dims=("coord"), - coords={"coord": coords}, - attrs=attrs_da, + data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da, name="name" ) # attrs are now kept per default func = getattr(da.rolling(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == attrs_da + assert result.name == "name" # discard attrs func = getattr(da.rolling(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} + assert result.name == "name" # test discard attrs using global option func = getattr(da.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument) assert result.attrs == {} + assert result.name == "name" def test_rolling_keep_attrs_deprecated(): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 2e6e2f69554..53093e77ab8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6025,14 +6025,18 @@ def test_rolling_keep_attrs(funcname, argument): result = func(*argument) assert result.attrs == global_attrs assert result.da.attrs == da_attrs - assert ds.da_not_rolled.attrs == da_not_rolled_attrs + assert result.da_not_rolled.attrs == da_not_rolled_attrs + assert result.da.name == "da" + assert result.da_not_rolled.name == "da_not_rolled" # discard attrs func = getattr(ds.rolling(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.da.attrs == {} - assert ds.da_not_rolled.attrs == {} + assert result.da_not_rolled.attrs == {} + assert result.da.name == "da" + assert result.da_not_rolled.name == "da_not_rolled" # test discard attrs using global option func = getattr(ds.rolling(dim={"coord": 5}), funcname) @@ -6041,7 +6045,9 @@ def test_rolling_keep_attrs(funcname, argument): assert result.attrs == {} assert result.da.attrs == {} - assert ds.da_not_rolled.attrs == {} + assert result.da_not_rolled.attrs == {} + assert result.da.name == "da" + assert result.da_not_rolled.name == "da_not_rolled" def test_rolling_keep_attrs_deprecated(): From 8ce9044192c4e36614e4151827f4d1716a7285a4 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 30 Oct 2020 16:29:12 +0100 Subject: [PATCH 13/16] test global default and kwarg --- xarray/tests/test_dataarray.py | 13 +++++++++++++ xarray/tests/test_dataset.py | 21 +++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index efd27d53cc3..e944c020503 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6580,6 +6580,19 @@ def test_rolling_keep_attrs(funcname, argument): assert result.attrs == {} assert result.name == "name" + # keyword takes precedence over global option + func = getattr(da.rolling(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument, keep_attrs=True) + assert result.attrs == attrs_da + assert result.name == "name" + + func = getattr(da.rolling(dim={"coord": 5}), funcname) + with set_options(keep_attrs=True): + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + assert result.name == "name" + def test_rolling_keep_attrs_deprecated(): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 53093e77ab8..d5483423140 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6049,6 +6049,27 @@ def test_rolling_keep_attrs(funcname, argument): assert result.da.name == "da" assert result.da_not_rolled.name == "da_not_rolled" + # keyword takes precedence over global option + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument, keep_attrs=True) + + assert result.attrs == global_attrs + assert result.da.attrs == da_attrs + assert result.da_not_rolled.attrs == da_not_rolled_attrs + assert result.da.name == "da" + assert result.da_not_rolled.name == "da_not_rolled" + + func = getattr(ds.rolling(dim={"coord": 5}), funcname) + with set_options(keep_attrs=True): + result = func(*argument, keep_attrs=False) + + assert result.attrs == {} + assert result.da.attrs == {} + assert result.da_not_rolled.attrs == {} + assert result.da.name == "da" + assert result.da_not_rolled.name == "da_not_rolled" + def test_rolling_keep_attrs_deprecated(): global_attrs = {"units": "test", "long_name": "testing"} From 54371dfe8bbf49164bec160b57eebe5216e1664e Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 30 Oct 2020 16:29:54 +0100 Subject: [PATCH 14/16] more fixes... --- xarray/core/rolling.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 5dfd0a04903..9c5af2f14c9 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -393,7 +393,7 @@ def reduce(self, func, keep_attrs=None, **kwargs): d: utils.get_temp_dimname(self.obj.dims, f"_rolling_dim_{d}") for d in self.dim } - windows = self.construct(rolling_dim) + windows = self.construct(rolling_dim, keep_attrs=keep_attrs) result = windows.reduce( func, dim=list(rolling_dim.values()), keep_attrs=keep_attrs, **kwargs ) @@ -414,7 +414,7 @@ def _counts(self, keep_attrs): # The use of skipna==False is also faster since it does not need to # copy the strided array. counts = ( - self.obj.notnull() + self.obj.notnull(keep_attrs=keep_attrs) .rolling( center={d: self.center[i] for i, d in enumerate(self.dim)}, **{d: w for d, w in zip(self.dim, self.window)}, @@ -558,7 +558,7 @@ def _dataset_implementation(self, func, keep_attrs, **kwargs): if any(d in da.dims for d in self.dim): reduced[key] = func(self.rollings[key], keep_attrs=keep_attrs, **kwargs) else: - reduced[key] = self.obj[key] + reduced[key] = self.obj[key].copy(deep=False) # we need to delete the attrs of the copied DataArray if not keep_attrs: reduced[key].attrs = {} @@ -666,10 +666,13 @@ def construct( st = {d: stride[i] for i, d in enumerate(self.dim) if d in da.dims} dataset[key] = self.rollings[key].construct( - window_dim=wi, fill_value=fill_value, stride=st + window_dim=wi, + fill_value=fill_value, + stride=st, + keep_attrs=keep_attrs, ) else: - dataset[key] = da + dataset[key] = da.copy(deep=False) # as the DataArrays can be copied we need to delete the attrs if not keep_attrs: From 1ee45f537e1cf97c075e6a75c0beb67584e405a1 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 30 Oct 2020 16:38:00 +0100 Subject: [PATCH 15/16] do a deep copy --- xarray/core/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 9c5af2f14c9..38cb11b55ff 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -558,7 +558,7 @@ def _dataset_implementation(self, func, keep_attrs, **kwargs): if any(d in da.dims for d in self.dim): reduced[key] = func(self.rollings[key], keep_attrs=keep_attrs, **kwargs) else: - reduced[key] = self.obj[key].copy(deep=False) + reduced[key] = self.obj[key].copy() # we need to delete the attrs of the copied DataArray if not keep_attrs: reduced[key].attrs = {} @@ -672,7 +672,7 @@ def construct( keep_attrs=keep_attrs, ) else: - dataset[key] = da.copy(deep=False) + dataset[key] = da.copy() # as the DataArrays can be copied we need to delete the attrs if not keep_attrs: From 502ca77a9be87e51f7bdc2e8464d91d2b7a7d58c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Nov 2020 15:02:56 +0100 Subject: [PATCH 16/16] Apply suggestions from code review --- xarray/core/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/core/common.py b/xarray/core/common.py index 06911d4a221..7078a4c1604 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -577,10 +577,13 @@ def pipe( >>> def adder(data, arg): ... return data + arg + ... >>> def div(data, arg): ... return data / arg + ... >>> def sub_mult(data, sub_arg, mult_arg): ... return (data * mult_arg) - sub_arg + ... >>> x.pipe(adder, 2) Dimensions: (lat: 2, lon: 2)