diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3c3bf127a3f..c835fbeff45 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -80,6 +80,8 @@ New Features invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. +- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`. + (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c73ee3cf7c5..38ecc04534a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -557,6 +557,59 @@ def fillna(self, value): out = ops.fillna(self, value) return out + def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + """Compute the qth quantile over each array in the groups and + concatenate them together into a new array. + + Parameters + ---------- + q : float in range of [0,1] (or sequence of floats) + Quantile to compute, which must be between 0 and 1 + inclusive. + dim : `...`, str or sequence of str, optional + Dimension(s) over which to apply quantile. + Defaults to the grouped dimension. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` is + the fractional part of the index surrounded by ``i`` and + ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + + Returns + ------- + quantiles : Variable + If `q` is a single quantile, then the result is a + scalar. If multiple percentiles are given, first axis of + the result corresponds to the quantile. In either case a + quantile dimension is added to the return array. The other + dimensions are the dimensions that remain after the + reduction of the array. + + See Also + -------- + numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, + DataArray.quantile + """ + if dim is None: + dim = self._group_dim + + out = self.map( + self._obj.__class__.quantile, + shortcut=False, + q=q, + dim=dim, + interpolation=interpolation, + keep_attrs=keep_attrs, + ) + + return out + def where(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. @@ -737,60 +790,6 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): combined = self._maybe_unstack(combined) return combined - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): - """Compute the qth quantile over each array in the groups and - concatenate them together into a new array. - - Parameters - ---------- - q : float in range of [0,1] (or sequence of floats) - Quantile to compute, which must be between 0 and 1 - inclusive. - dim : `...`, str or sequence of str, optional - Dimension(s) over which to apply quantile. - Defaults to the grouped dimension. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. - - Returns - ------- - quantiles : Variable - If `q` is a single quantile, then the result - is a scalar. If multiple percentiles are given, first axis of - the result corresponds to the quantile and a quantile dimension - is added to the return array. The other dimensions are the - dimensions that remain after the reduction of the array. - - See Also - -------- - numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, - DataArray.quantile - """ - if dim is None: - dim = self._group_dim - - out = self.map( - self._obj.__class__.quantile, - shortcut=False, - q=q, - dim=dim, - interpolation=interpolation, - keep_attrs=keep_attrs, - ) - - if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop_vars("quantile") - return out - def reduce( self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs ): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 581affa3471..97bd31ae050 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -137,42 +137,58 @@ def test_da_groupby_empty(): def test_da_groupby_quantile(): - array = xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]) + array = xr.DataArray( + data=[1, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x" + ) # Scalar quantile - expected = xr.DataArray([2, 5], [("x", [1, 2])]) + expected = xr.DataArray( + data=[2, 5], coords={"x": [1, 2], "quantile": 0.5}, dims="x" + ) actual = array.groupby("x").quantile(0.5) assert_identical(expected, actual) # Vector quantile - expected = xr.DataArray([[1, 3], [4, 6]], [("x", [1, 2]), ("quantile", [0, 1])]) + expected = xr.DataArray( + data=[[1, 3], [4, 6]], + coords={"x": [1, 2], "quantile": [0, 1]}, + dims=("x", "quantile"), + ) actual = array.groupby("x").quantile([0, 1]) assert_identical(expected, actual) # Multiple dimensions array = xr.DataArray( - [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])], + data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + dims=("x", "y"), ) actual_x = array.groupby("x").quantile(0, dim=...) - expected_x = xr.DataArray([1, 4], [("x", [1, 2])]) + expected_x = xr.DataArray( + data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x" + ) assert_identical(expected_x, actual_x) actual_y = array.groupby("y").quantile(0, dim=...) - expected_y = xr.DataArray([1, 22], [("y", [0, 1])]) + expected_y = xr.DataArray( + data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y" + ) assert_identical(expected_y, actual_y) actual_xx = array.groupby("x").quantile(0) expected_xx = xr.DataArray( - [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])] + data=[[1, 11, 22], [4, 15, 24]], + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_xx, actual_xx) actual_yy = array.groupby("y").quantile(0) expected_yy = xr.DataArray( - [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])], + data=[[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_yy, actual_yy) @@ -180,14 +196,14 @@ def test_da_groupby_quantile(): x = [0, 1] foo = xr.DataArray( np.reshape(np.arange(365 * 2), (365, 2)), - coords=dict(time=times, x=x), + coords={"time": times, "x": x}, dims=("time", "x"), ) g = foo.groupby(foo.time.dt.month) actual = g.quantile(0, dim=...) expected = xr.DataArray( - [ + data=[ 0.0, 62.0, 120.0, @@ -201,12 +217,111 @@ def test_da_groupby_quantile(): 610.0, 670.0, ], - [("month", np.arange(1, 13))], + coords={"month": np.arange(1, 13), "quantile": 0}, + dims="month", ) assert_identical(expected, actual) actual = g.quantile(0, dim="time")[:2] - expected = xr.DataArray([[0.0, 1], [62.0, 63]], [("month", [1, 2]), ("x", [0, 1])]) + expected = xr.DataArray( + data=[[0.0, 1], [62.0, 63]], + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, + dims=("month", "x"), + ) + assert_identical(expected, actual) + + +def test_ds_groupby_quantile(): + ds = xr.Dataset( + data_vars={"a": ("x", [1, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]} + ) + + # Scalar quantile + expected = xr.Dataset( + data_vars={"a": ("x", [2, 5])}, coords={"quantile": 0.5, "x": [1, 2]} + ) + actual = ds.groupby("x").quantile(0.5) + assert_identical(expected, actual) + + # Vector quantile + expected = xr.Dataset( + data_vars={"a": (("x", "quantile"), [[1, 3], [4, 6]])}, + coords={"x": [1, 2], "quantile": [0, 1]}, + ) + actual = ds.groupby("x").quantile([0, 1]) + assert_identical(expected, actual) + + # Multiple dimensions + ds = xr.Dataset( + data_vars={ + "a": ( + ("x", "y"), + [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + ) + }, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + ) + + actual_x = ds.groupby("x").quantile(0, dim=...) + expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0}) + assert_identical(expected_x, actual_x) + + actual_y = ds.groupby("y").quantile(0, dim=...) + expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0}) + assert_identical(expected_y, actual_y) + + actual_xx = ds.groupby("x").quantile(0) + expected_xx = xr.Dataset( + {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])}, + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, + ) + assert_identical(expected_xx, actual_xx) + + actual_yy = ds.groupby("y").quantile(0) + expected_yy = xr.Dataset( + {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])}, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, + ).transpose() + assert_identical(expected_yy, actual_yy) + + times = pd.date_range("2000-01-01", periods=365) + x = [0, 1] + foo = xr.Dataset( + {"a": (("time", "x"), np.reshape(np.arange(365 * 2), (365, 2)))}, + coords=dict(time=times, x=x), + ) + g = foo.groupby(foo.time.dt.month) + + actual = g.quantile(0, dim=...) + expected = xr.Dataset( + { + "a": ( + "month", + [ + 0.0, + 62.0, + 120.0, + 182.0, + 242.0, + 304.0, + 364.0, + 426.0, + 488.0, + 548.0, + 610.0, + 670.0, + ], + ) + }, + coords={"month": np.arange(1, 13), "quantile": 0}, + ) + assert_identical(expected, actual) + + actual = g.quantile(0, dim="time").isel(month=slice(None, 2)) + expected = xr.Dataset( + data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])}, + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, + ) assert_identical(expected, actual)