Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Coarsen keep attrs 3376 #3801

Merged
merged 18 commits into from
Mar 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ Bug fixes
- xarray now respects the over, under and bad colors if set on a provided colormap.
(:issue:`3590`, :pull:`3601`)
By `johnomotani <https://github.com/johnomotani>`_.
- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)``
to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword
argument ``keep_attrs`` to change this setting. (:issue:`3376`,
:pull:`3801`) By `Andrew Thomas <https://github.com/amcnicho>`_.

- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group`
simultaneously. (:issue:`3170`). By `Matthias Meyer <https://github.com/niowniow>`_.

Expand Down
29 changes: 26 additions & 3 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,7 @@ def rolling(
dim: Mapping[Hashable, int] = None,
min_periods: int = None,
center: bool = False,
keep_attrs: bool = None,
**window_kwargs: int,
):
"""
Expand All @@ -769,6 +770,10 @@ def rolling(
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**window_kwargs : optional
The keyword arguments form of ``dim``.
One of dim or window_kwargs must be provided.
Expand Down Expand Up @@ -810,8 +815,13 @@ def rolling(
core.rolling.DataArrayRolling
core.rolling.DatasetRolling
"""
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

dim = either_dict_or_kwargs(dim, window_kwargs, "rolling")
return self._rolling_cls(self, dim, min_periods=min_periods, center=center)
return self._rolling_cls(
self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs
)

def rolling_exp(
self,
Expand Down Expand Up @@ -859,6 +869,7 @@ def coarsen(
boundary: str = "exact",
side: Union[str, Mapping[Hashable, str]] = "left",
coord_func: str = "mean",
keep_attrs: bool = None,
**window_kwargs: int,
):
"""
Expand All @@ -879,8 +890,12 @@ def coarsen(
multiple of the window size. If 'trim', the excess entries are
dropped. If 'pad', NA will be padded.
side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
coord_func : function (name) that is applied to the coordintes,
coord_func : function (name) that is applied to the coordinates,
or a mapping from coordinate name to function (name).
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand Down Expand Up @@ -915,9 +930,17 @@ def coarsen(
core.rolling.DataArrayCoarsen
core.rolling.DatasetCoarsen
"""
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
max-sixty marked this conversation as resolved.
Show resolved Hide resolved

dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen")
return self._coarsen_cls(
self, dim, boundary=boundary, side=side, coord_func=coord_func
self,
dim,
boundary=boundary,
side=side,
coord_func=coord_func,
keep_attrs=keep_attrs,
)

def resample(
Expand Down
67 changes: 54 additions & 13 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from . import dtypes, duck_array_ops, utils
from .dask_array_ops import dask_rolling_wrapper
from .ops import inject_reduce_methods
from .options import _get_keep_attrs
from .pycompat import dask_array_type

try:
Expand Down Expand Up @@ -42,10 +43,10 @@ class Rolling:
DataArray.rolling
"""

__slots__ = ("obj", "window", "min_periods", "center", "dim")
_attributes = ("window", "min_periods", "center", "dim")
__slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs")
_attributes = ("window", "min_periods", "center", "dim", "keep_attrs")

def __init__(self, obj, windows, min_periods=None, center=False):
def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
"""
Moving window object.
Expand All @@ -65,6 +66,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand All @@ -89,6 +94,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
self.center = center
self.dim = dim

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
self.keep_attrs = keep_attrs

@property
def _min_periods(self):
return self.min_periods if self.min_periods is not None else self.window
Expand Down Expand Up @@ -143,7 +152,7 @@ def count(self):
class DataArrayRolling(Rolling):
__slots__ = ("window_labels",)

def __init__(self, obj, windows, min_periods=None, center=False):
def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
"""
Moving window object for DataArray.
You should use DataArray.rolling() method to construct this object
Expand All @@ -165,6 +174,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand All @@ -177,7 +190,11 @@ def __init__(self, obj, windows, min_periods=None, center=False):
Dataset.rolling
Dataset.groupby
"""
super().__init__(obj, windows, min_periods=min_periods, center=center)
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
super().__init__(
obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs
)

self.window_labels = self.obj[self.dim]

Expand Down Expand Up @@ -374,7 +391,7 @@ def _numpy_or_bottleneck_reduce(
class DatasetRolling(Rolling):
__slots__ = ("rollings",)

def __init__(self, obj, windows, min_periods=None, center=False):
def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
"""
Moving window object for Dataset.
You should use Dataset.rolling() method to construct this object
Expand All @@ -396,6 +413,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
setting min_periods equal to the size of the window.
center : boolean, default False
Set the labels at the center of the window.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
Returns
-------
Expand All @@ -408,15 +429,17 @@ def __init__(self, obj, windows, min_periods=None, center=False):
Dataset.groupby
DataArray.groupby
"""
super().__init__(obj, windows, min_periods, center)
super().__init__(obj, windows, min_periods, center, keep_attrs)
if self.dim not in self.obj.dims:
raise KeyError(self.dim)
# Keep each Rolling object as a dictionary
self.rollings = {}
for key, da in self.obj.data_vars.items():
# keeps rollings only for the dataset depending on slf.dim
if self.dim in da.dims:
self.rollings[key] = DataArrayRolling(da, windows, min_periods, center)
self.rollings[key] = DataArrayRolling(
da, windows, min_periods, center, keep_attrs
)

def _dataset_implementation(self, func, **kwargs):
from .dataset import Dataset
Expand All @@ -427,7 +450,8 @@ def _dataset_implementation(self, func, **kwargs):
reduced[key] = func(self.rollings[key], **kwargs)
else:
reduced[key] = self.obj[key]
return Dataset(reduced, coords=self.obj.coords)
attrs = self.obj.attrs if self.keep_attrs else {}
return Dataset(reduced, coords=self.obj.coords, attrs=attrs)

def reduce(self, func, **kwargs):
"""Reduce the items in this group by applying `func` along some
Expand Down Expand Up @@ -466,7 +490,7 @@ def _numpy_or_bottleneck_reduce(
**kwargs,
)

def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
def construct(self, window_dim, stride=1, fill_value=dtypes.NA, keep_attrs=None):
"""
Convert this rolling object to xr.Dataset,
where the window dimension is stacked as a new dimension
Expand All @@ -487,6 +511,9 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA):

from .dataset import Dataset

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)

dataset = {}
for key, da in self.obj.data_vars.items():
if self.dim in da.dims:
Expand All @@ -509,10 +536,18 @@ class Coarsen:
DataArray.coarsen
"""

__slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess")
__slots__ = (
"obj",
"boundary",
"coord_func",
"windows",
"side",
"trim_excess",
"keep_attrs",
)
_attributes = ("windows", "side", "trim_excess")

def __init__(self, obj, windows, boundary, side, coord_func):
def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs):
"""
Moving window object.
Expand Down Expand Up @@ -541,6 +576,7 @@ def __init__(self, obj, windows, boundary, side, coord_func):
self.windows = windows
self.side = side
self.boundary = boundary
self.keep_attrs = keep_attrs

absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
if absent_dims:
Expand Down Expand Up @@ -626,6 +662,11 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
def wrapped_func(self, **kwargs):
from .dataset import Dataset

if self.keep_attrs:
attrs = self.obj.attrs
else:
attrs = {}

reduced = {}
for key, da in self.obj.data_vars.items():
reduced[key] = da.variable.coarsen(
Expand All @@ -644,7 +685,7 @@ def wrapped_func(self, **kwargs):
)
else:
coords[c] = v.variable
return Dataset(reduced, coords=coords)
return Dataset(reduced, coords=coords, attrs=attrs)

return wrapped_func

Expand Down
3 changes: 3 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,9 @@ def _coarsen_reshape(self, windows, boundary, side):
else:
shape.append(variable.shape[i])

keep_attrs = _get_keep_attrs(default=False)
variable.attrs = variable._attrs if keep_attrs else {}

return variable.data.reshape(shape), tuple(axes)

@property
Expand Down
56 changes: 56 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5664,6 +5664,62 @@ def test_coarsen_coords_cftime():
np.testing.assert_array_equal(actual.time, expected_times)


def test_coarsen_keep_attrs():
_attrs = {"units": "test", "long_name": "testing"}

var1 = np.linspace(10, 15, 100)
var2 = np.linspace(5, 10, 100)
coords = np.linspace(1, 10, 100)

ds = Dataset(
data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
coords={"coord": coords},
max-sixty marked this conversation as resolved.
Show resolved Hide resolved
attrs=_attrs,
)

# Test dropped attrs
dat = ds.coarsen(coord=5).mean()
assert dat.attrs == {}

# Test kept attrs using dataset keyword
dat = ds.coarsen(coord=5, keep_attrs=True).mean()
assert dat.attrs == _attrs

# Test kept attrs using global option
with set_options(keep_attrs=True):
dat = ds.coarsen(coord=5).mean()
assert dat.attrs == _attrs


def test_rolling_keep_attrs():
_attrs = {"units": "test", "long_name": "testing"}

var1 = np.linspace(10, 15, 100)
var2 = np.linspace(5, 10, 100)
coords = np.linspace(1, 10, 100)

ds = Dataset(
data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
coords={"coord": coords},
attrs=_attrs,
)

# Test dropped attrs
dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
assert dat.attrs == {}

# Test kept attrs using dataset keyword
dat = ds.rolling(
dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True
).mean()
assert dat.attrs == _attrs

# Test kept attrs using global option
with set_options(keep_attrs=True):
dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
assert dat.attrs == _attrs


def test_rolling_properties(ds):
# catching invalid args
with pytest.raises(ValueError, match="exactly one dim/window should"):
Expand Down
22 changes: 21 additions & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytz

from xarray import Coordinate, Dataset, IndexVariable, Variable, set_options
from xarray.core import dtypes, indexing
from xarray.core import dtypes, duck_array_ops, indexing
from xarray.core.common import full_like, ones_like, zeros_like
from xarray.core.indexing import (
BasicIndexer,
Expand Down Expand Up @@ -1879,6 +1879,26 @@ def test_coarsen_2d(self):
expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
assert_equal(actual, expected)

# perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops])
def test_coarsen_keep_attrs(self, operation="mean"):
_attrs = {"units": "test", "long_name": "testing"}

test_func = getattr(duck_array_ops, operation, None)

# Test dropped attrs
with set_options(keep_attrs=False):
new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
windows={"coord": 1}, func=test_func, boundary="exact", side="left"
)
assert new.attrs == {}

# Test kept attrs
with set_options(keep_attrs=True):
new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
windows={"coord": 1}, func=test_func, boundary="exact", side="left"
)
assert new.attrs == _attrs


@requires_dask
class TestVariableWithDask(VariableSubclassobjects):
Expand Down