Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: keepdims=True for xarray reductions #3033

Merged
merged 7 commits into from
Jun 23, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Enhancements
~~~~~~~~~~~~


- Add ``keepdims`` argument for reduce operations (:issue:`2170`)
By `Scott Wales <https://github.com/ScottWales>`_.
- netCDF chunksizes are now only dropped when original_shape is different,
not when it isn't found. (:issue:`2207`)
By `Karel van de Plassche <https://github.com/Karel-van-de-Plassche>`_.
Expand Down
18 changes: 15 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,14 @@ def _replace(self, variable=None, coords=None, name=__default):
return type(self)(variable, coords, name=name, fastpath=True)

def _replace_maybe_drop_dims(self, variable, name=__default):
if variable.dims == self.dims:
if variable.dims == self.dims and variable.shape == self.shape:
coords = self._coords.copy()
elif variable.dims == self.dims:
# Shape has changed (e.g. from reduce(..., keepdims=True)
new_sizes = dict(zip(self.dims, variable.shape))
coords = OrderedDict((k, v) for k, v in self._coords.items()
if v.shape == tuple(new_sizes[d]
for d in v.dims))
else:
allowed_dims = set(variable.dims)
coords = OrderedDict((k, v) for k, v in self._coords.items()
Expand Down Expand Up @@ -1637,7 +1643,8 @@ def combine_first(self, other):
"""
return ops.fillna(self, other, join="outer")

def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
def reduce(self, func, dim=None, axis=None, keep_attrs=None, keepdims=None,
**kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand All @@ -1657,6 +1664,10 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
keepdims : bool, optional
ScottWales marked this conversation as resolved.
Show resolved Hide resolved
If True, the dimensions which are reduced are left in the result
as dimensions of size one. Coordinates that use these dimensions
are removed.
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand All @@ -1667,7 +1678,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
summarized data and the indicated dimension(s) removed.
"""

var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims,
**kwargs)
return self._replace_maybe_drop_dims(var)

def to_pandas(self):
Expand Down
9 changes: 7 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3132,8 +3132,8 @@ def combine_first(self, other):
out = ops.fillna(self, other, join="outer", dataset_join="outer")
return out

def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
allow_lazy=False, **kwargs):
def reduce(self, func, dim=None, keep_attrs=None, keepdims=None,
numeric_only=False, allow_lazy=False, **kwargs):
"""Reduce this dataset by applying `func` along some dimension(s).

Parameters
Expand All @@ -3149,6 +3149,10 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
If True, the dataset's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
keepdims : bool, optional
ScottWales marked this conversation as resolved.
Show resolved Hide resolved
If True, the dimensions which are reduced are left in the result
as dimensions of size one. Coordinates that use these dimensions
are removed.
numeric_only : bool, optional
If True, only apply ``func`` to variables with a numeric dtype.
**kwargs : dict
Expand Down Expand Up @@ -3198,6 +3202,7 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
reduce_dims = None
variables[name] = var.reduce(func, dim=reduce_dims,
keep_attrs=keep_attrs,
keepdims=keepdims,
allow_lazy=allow_lazy,
**kwargs)

Expand Down
11 changes: 10 additions & 1 deletion xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,7 +1334,7 @@ def where(self, cond, other=dtypes.NA):
return ops.where_method(self, cond, other)

def reduce(self, func, dim=None, axis=None,
keep_attrs=None, allow_lazy=False, **kwargs):
dcherian marked this conversation as resolved.
Show resolved Hide resolved
keep_attrs=None, keepdims=None, allow_lazy=False, **kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand All @@ -1354,6 +1354,9 @@ def reduce(self, func, dim=None, axis=None,
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
keepdims : bool, optional
ScottWales marked this conversation as resolved.
Show resolved Hide resolved
If True, the dimensions which are reduced are left in the result
as dimensions of size one
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand Down Expand Up @@ -1388,6 +1391,12 @@ def reduce(self, func, dim=None, axis=None,
keep_attrs = _get_keep_attrs(default=False)
attrs = self._attrs if keep_attrs else None

if keepdims:
dcherian marked this conversation as resolved.
Show resolved Hide resolved
for i, d in enumerate(self.dims):
if d not in dims:
dims.insert(i, d)
data = np.expand_dims(data, axis=i)
dcherian marked this conversation as resolved.
Show resolved Hide resolved

return Variable(dims, data, attrs=attrs)

@classmethod
Expand Down
38 changes: 38 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,6 +1976,44 @@ def test_reduce(self):
dims=['x', 'y']).mean('x')
assert_equal(actual, expected)

def test_reduce_keepdims(self):
coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
'c': -999}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

# Mean on all axes loses non-constant coordinates
actual = orig.mean(keepdims=True)
expected = DataArray(orig.data.mean(keepdims=True), dims=orig.dims,
coords={k: v for k, v in coords.items()
if k in ['c']})
assert_equal(actual, expected)

assert actual.sizes['x'] == 1
assert actual.sizes['y'] == 1

# Mean on specific axes loses coordinates not involving that axis
actual = orig.mean('y', keepdims=True)
expected = DataArray(orig.data.mean(axis=1, keepdims=True),
dims=orig.dims,
coords={k: v for k, v in coords.items()
if k not in ['y', 'lat']})
assert_equal(actual, expected)

@requires_bottleneck
def test_reduce_keepdims_bottleneck(self):
import bottleneck

coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
'c': -999}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

# Bottleneck does not have its own keepdims implementation
actual = orig.reduce(bottleneck.nanmean, keepdims=True)
expected = orig.mean(keepdims=True)
assert_equal(actual, expected)

def test_reduce_dtype(self):
coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
Expand Down
19 changes: 19 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3858,6 +3858,25 @@ def total_sum(x):
with raises_regex(TypeError, "unexpected keyword argument 'axis'"):
ds.reduce(total_sum, dim='x')

def test_reduce_keepdims(self):
ds = Dataset({'a': (['x', 'y'], [[0, 1, 2, 3, 4]])},
coords={'y': [0, 1, 2, 3, 4], 'x': [0],
'lat': (['x', 'y'], [[0, 1, 2, 3, 4]]),
'c': -999.0})

# Shape should match behaviour of numpy reductions with keepdims=True
# Coordinates involved in the reduction should be removed
actual = ds.mean(keepdims=True)
expected = Dataset({'a': (['x', 'y'], np.mean(ds.a, keepdims=True))},
coords={'c': ds.c})
assert_identical(expected, actual)

actual = ds.mean('x', keepdims=True)
expected = Dataset({'a': (['x', 'y'],
np.mean(ds.a, axis=0, keepdims=True))},
coords={'y': ds.y, 'c': ds.c})
assert_identical(expected, actual)

def test_quantile(self):

ds = create_test_data(seed=123)
Expand Down
15 changes: 15 additions & 0 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1540,6 +1540,21 @@ def test_reduce_funcs(self):
assert_identical(
v.max(), Variable([], pd.Timestamp('2000-01-03')))

def test_reduce_keepdims(self):
v = Variable(['x', 'y'], self.d)

assert_identical(v.mean(keepdims=True),
Variable(v.dims, np.mean(self.d, keepdims=True)))
assert_identical(v.mean(dim='x', keepdims=True),
Variable(v.dims, np.mean(self.d, axis=0,
keepdims=True)))
assert_identical(v.mean(dim='y', keepdims=True),
Variable(v.dims, np.mean(self.d, axis=1,
keepdims=True)))
assert_identical(v.mean(dim=['y', 'x'], keepdims=True),
Variable(v.dims, np.mean(self.d, axis=(1, 0),
keepdims=True)))

def test_reduce_keep_attrs(self):
_attrs = {'units': 'test', 'long_name': 'testing'}

Expand Down