Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add quantile method to GroupBy #2828

Merged
merged 15 commits into from
Jun 24, 2019
3 changes: 2 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ Computation
:py:attr:`~core.groupby.DatasetGroupBy.last`
:py:attr:`~core.groupby.DatasetGroupBy.fillna`
:py:attr:`~core.groupby.DatasetGroupBy.where`
:py:attr:`~core.groupby.DatasetGroupBy.quantile`

Reshaping and reorganizing
--------------------------
Expand Down Expand Up @@ -360,7 +361,7 @@ Computation
:py:attr:`~core.groupby.DataArrayGroupBy.last`
:py:attr:`~core.groupby.DataArrayGroupBy.fillna`
:py:attr:`~core.groupby.DataArrayGroupBy.where`

:py:attr:`~core.groupby.DataArrayGroupBy.quantile`

Reshaping and reorganizing
--------------------------
Expand Down
5 changes: 3 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ v0.12.2 (unreleased)
Enhancements
~~~~~~~~~~~~


- New :py:meth:`~xarray.GroupBy.quantile` method. (:issue:`3018`)
By `David Huard <https://github.com/huard>`_.
- netCDF chunksizes are now only dropped when original_shape is different,
not when it isn't found. (:issue:`2207`)
By `Karel van de Plassche <https://github.com/Karel-van-de-Plassche>`_.
Expand Down Expand Up @@ -76,7 +77,7 @@ Bug fixes
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Fixed performance issues with cftime installed (:issue:`3000`)
By `0x0L <https://github.com/0x0L>`_.
- Replace incorrect usages of `message` in pytest assertions
- Replace incorrect usages of `message` in pytest assertions
with `match` (:issue:`3011`)
By `Maximilian Roos <https://github.com/max-sixty>`_.

Expand Down
49 changes: 49 additions & 0 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,55 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
combined = self._maybe_unstack(combined)
return combined

def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
"""Compute the qth quantile over each array in the groups and
concatenate them together into a new array.

Parameters
----------
q : float in range of [0,1] (or sequence of floats)
Quantile to compute, which must be between 0 and 1
inclusive.
dim : str or sequence of str, optional
Dimension(s) over which to apply quantile.
Defaults to the grouped dimension.
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
This optional parameter specifies the interpolation method to
use when the desired quantile lies between two data points
``i < j``:
* linear: ``i + (j - i) * fraction``, where ``fraction`` is
the fractional part of the index surrounded by ``i`` and
``j``.
* lower: ``i``.
* higher: ``j``.
* nearest: ``i`` or ``j``, whichever is nearest.
* midpoint: ``(i + j) / 2``.

Returns
-------
quantiles : Variable
If `q` is a single quantile, then the result
is a scalar. If multiple percentiles are given, first axis of
the result corresponds to the quantile and a quantile dimension
is added to the return array. The other dimensions are the
dimensions that remain after the reduction of the array.

See Also
--------
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
DataArray.quantile
"""
if dim is None:
huard marked this conversation as resolved.
Show resolved Hide resolved
dim = self._group_dim

out = self.apply(self._obj.__class__.quantile, shortcut=False,
q=q, dim=dim, interpolation=interpolation,
keep_attrs=keep_attrs)

if np.asarray(q, dtype=np.float64).ndim == 0:
out = out.drop('quantile')
return out

def reduce(self, func, dim=None, axis=None, keep_attrs=None,
shortcut=True, **kwargs):
"""Reduce the items in this group by applying `func` along some
Expand Down
45 changes: 45 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ def func(arg1, arg2, arg3=0):
assert_identical(expected, actual)


@pytest.mark.xfail
def test_da_groupby_single_value_per_dim():

array = xr.DataArray([[1, 1, 1], [2, 2, 2]],
[('x', [1, 2]), ('y', [0, 1, 2])])

# This raises an error.
# I think the issue is that gr._group_indices is [0, 1]
# instead of [[0,], [1,]]
array.groupby('x').mean(dim='x')
huard marked this conversation as resolved.
Show resolved Hide resolved


def test_ds_groupby_apply_func_args():

def func(arg1, arg2, arg3=0):
Expand All @@ -105,4 +117,37 @@ def func(arg1, arg2, arg3=0):
assert_identical(expected, actual)


def test_da_groupby_quantile():

array = xr.DataArray([1, 2, 3, 4, 5, 6],
[('x', [1, 1, 1, 2, 2, 2])])

# Scalar quantile
expected = xr.DataArray([2, 5], [('x', [1, 2])])
actual = array.groupby('x').quantile(.5)
assert_identical(expected, actual)

# Vector quantile
expected = xr.DataArray([[1, 3], [4, 6]],
[('x', [1, 2]), ('quantile', [0, 1])])
actual = array.groupby('x').quantile([0, 1])
assert_identical(expected, actual)

# Multiple dimensions
array = xr.DataArray([[1, 11, 21], [2, 12, 22], [3, 13, 23],
[4, 16, 24], [5, 15, 25]],
[('x', [1, 1, 1, 2, 2],),
('y', [0, 0, 1])])

expected = xr.DataArray([[1, 11, 21], [4, 15, 24]],
[('x', [1, 2]), ('y', [0, 0, 1])])
actual = array.groupby('x').quantile(0, dim='x')
assert_identical(expected, actual)

expected = xr.DataArray([[1, 21], [2, 22], [3, 23], [4, 24], [5, 25]],
[('x', [1, 1, 1, 2, 2]), ('y', [0, 1])])
actual = array.groupby('y').quantile(0, dim='y')
assert_identical(expected, actual)


# TODO: move other groupby tests from test_dataset and test_dataarray over here