diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 52fa102f7fa..373cb8d13dc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,8 @@ Enhancements ~~~~~~~~~~~~ +- Add ``keepdims`` argument for reduce operations (:issue:`2170`) + By `Scott Wales `_. - netCDF chunksizes are now only dropped when original_shape is different, not when it isn't found. (:issue:`2207`) By `Karel van de Plassche `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4c3dcc2781a..ff77a6ab704 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -259,8 +259,14 @@ def _replace(self, variable=None, coords=None, name=__default): return type(self)(variable, coords, name=name, fastpath=True) def _replace_maybe_drop_dims(self, variable, name=__default): - if variable.dims == self.dims: + if variable.dims == self.dims and variable.shape == self.shape: coords = self._coords.copy() + elif variable.dims == self.dims: + # Shape has changed (e.g. from reduce(..., keepdims=True) + new_sizes = dict(zip(self.dims, variable.shape)) + coords = OrderedDict((k, v) for k, v in self._coords.items() + if v.shape == tuple(new_sizes[d] + for d in v.dims)) else: allowed_dims = set(variable.dims) coords = OrderedDict((k, v) for k, v in self._coords.items() @@ -1642,7 +1648,8 @@ def combine_first(self, other): """ return ops.fillna(self, other, join="outer") - def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs): + def reduce(self, func, dim=None, axis=None, keep_attrs=None, + keepdims=False, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -1662,6 +1669,10 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs): If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + keepdims : bool, default False + If True, the dimensions which are reduced are left in the result + as dimensions of size one. Coordinates that use these dimensions + are removed. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -1672,7 +1683,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs): summarized data and the indicated dimension(s) removed. """ - var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs) + var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, + **kwargs) return self._replace_maybe_drop_dims(var) def to_pandas(self): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 13a6a6ee9b2..3e00640ba60 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3152,8 +3152,8 @@ def combine_first(self, other): out = ops.fillna(self, other, join="outer", dataset_join="outer") return out - def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False, - allow_lazy=False, **kwargs): + def reduce(self, func, dim=None, keep_attrs=None, keepdims=False, + numeric_only=False, allow_lazy=False, **kwargs): """Reduce this dataset by applying `func` along some dimension(s). Parameters @@ -3169,6 +3169,10 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False, If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + keepdims : bool, default False + If True, the dimensions which are reduced are left in the result + as dimensions of size one. Coordinates that use these dimensions + are removed. numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. **kwargs : dict @@ -3218,6 +3222,7 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False, reduce_dims = None variables[name] = var.reduce(func, dim=reduce_dims, keep_attrs=keep_attrs, + keepdims=keepdims, allow_lazy=allow_lazy, **kwargs) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 41f8795b595..ab1be181e31 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1334,7 +1334,7 @@ def where(self, cond, other=dtypes.NA): return ops.where_method(self, cond, other) def reduce(self, func, dim=None, axis=None, - keep_attrs=None, allow_lazy=False, **kwargs): + keep_attrs=None, keepdims=False, allow_lazy=False, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -1354,6 +1354,9 @@ def reduce(self, func, dim=None, axis=None, If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + keepdims : bool, default False + If True, the dimensions which are reduced are left in the result + as dimensions of size one **kwargs : dict Additional keyword arguments passed on to `func`. @@ -1381,8 +1384,19 @@ def reduce(self, func, dim=None, axis=None, else: removed_axes = (range(self.ndim) if axis is None else np.atleast_1d(axis) % self.ndim) - dims = [adim for n, adim in enumerate(self.dims) - if n not in removed_axes] + if keepdims: + # Insert np.newaxis for removed dims + slices = tuple(np.newaxis if i in removed_axes else + slice(None, None) for i in range(self.ndim)) + if getattr(data, 'shape', None) is None: + # Reduce has produced a scalar value, not an array-like + data = np.asanyarray(data)[slices] + else: + data = data[slices] + dims = self.dims + else: + dims = [adim for n, adim in enumerate(self.dims) + if n not in removed_axes] if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a8825055479..47222194151 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1991,6 +1991,44 @@ def test_reduce(self): dims=['x', 'y']).mean('x') assert_equal(actual, expected) + def test_reduce_keepdims(self): + coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'], + 'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]), + 'c': -999} + orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y']) + + # Mean on all axes loses non-constant coordinates + actual = orig.mean(keepdims=True) + expected = DataArray(orig.data.mean(keepdims=True), dims=orig.dims, + coords={k: v for k, v in coords.items() + if k in ['c']}) + assert_equal(actual, expected) + + assert actual.sizes['x'] == 1 + assert actual.sizes['y'] == 1 + + # Mean on specific axes loses coordinates not involving that axis + actual = orig.mean('y', keepdims=True) + expected = DataArray(orig.data.mean(axis=1, keepdims=True), + dims=orig.dims, + coords={k: v for k, v in coords.items() + if k not in ['y', 'lat']}) + assert_equal(actual, expected) + + @requires_bottleneck + def test_reduce_keepdims_bottleneck(self): + import bottleneck + + coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'], + 'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]), + 'c': -999} + orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y']) + + # Bottleneck does not have its own keepdims implementation + actual = orig.reduce(bottleneck.nanmean, keepdims=True) + expected = orig.mean(keepdims=True) + assert_equal(actual, expected) + def test_reduce_dtype(self): coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'], 'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]), diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 8cd129e35de..e3a01bbd3a1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3898,6 +3898,25 @@ def total_sum(x): with raises_regex(TypeError, "unexpected keyword argument 'axis'"): ds.reduce(total_sum, dim='x') + def test_reduce_keepdims(self): + ds = Dataset({'a': (['x', 'y'], [[0, 1, 2, 3, 4]])}, + coords={'y': [0, 1, 2, 3, 4], 'x': [0], + 'lat': (['x', 'y'], [[0, 1, 2, 3, 4]]), + 'c': -999.0}) + + # Shape should match behaviour of numpy reductions with keepdims=True + # Coordinates involved in the reduction should be removed + actual = ds.mean(keepdims=True) + expected = Dataset({'a': (['x', 'y'], np.mean(ds.a, keepdims=True))}, + coords={'c': ds.c}) + assert_identical(expected, actual) + + actual = ds.mean('x', keepdims=True) + expected = Dataset({'a': (['x', 'y'], + np.mean(ds.a, axis=0, keepdims=True))}, + coords={'y': ds.y, 'c': ds.c}) + assert_identical(expected, actual) + def test_quantile(self): ds = create_test_data(seed=123) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 4ddd114d767..5da83880539 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1540,6 +1540,42 @@ def test_reduce_funcs(self): assert_identical( v.max(), Variable([], pd.Timestamp('2000-01-03'))) + def test_reduce_keepdims(self): + v = Variable(['x', 'y'], self.d) + + assert_identical(v.mean(keepdims=True), + Variable(v.dims, np.mean(self.d, keepdims=True))) + assert_identical(v.mean(dim='x', keepdims=True), + Variable(v.dims, np.mean(self.d, axis=0, + keepdims=True))) + assert_identical(v.mean(dim='y', keepdims=True), + Variable(v.dims, np.mean(self.d, axis=1, + keepdims=True))) + assert_identical(v.mean(dim=['y', 'x'], keepdims=True), + Variable(v.dims, np.mean(self.d, axis=(1, 0), + keepdims=True))) + + v = Variable([], 1.0) + assert_identical(v.mean(keepdims=True), + Variable([], np.mean(v.data, keepdims=True))) + + @requires_dask + def test_reduce_keepdims_dask(self): + import dask.array + v = Variable(['x', 'y'], self.d).chunk() + + actual = v.mean(keepdims=True) + assert isinstance(actual.data, dask.array.Array) + + expected = Variable(v.dims, np.mean(self.d, keepdims=True)) + assert_identical(actual, expected) + + actual = v.mean(dim='y', keepdims=True) + assert isinstance(actual.data, dask.array.Array) + + expected = Variable(v.dims, np.mean(self.d, axis=1, keepdims=True)) + assert_identical(actual, expected) + def test_reduce_keep_attrs(self): _attrs = {'units': 'test', 'long_name': 'testing'}