Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

quantile: use skipna=None #6303

Merged
merged 5 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ Deprecations
Bug fixes
~~~~~~~~~

- Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and
ensure it skips missing values for float dtypes (consistent with other methods). This should
not change the behavior (:pull:`6303`). By `Mathias Hauser <https://github.com/mathause>`_.

Documentation
~~~~~~~~~~~~~
Expand Down Expand Up @@ -86,7 +89,6 @@ Deprecations
Bug fixes
~~~~~~~~~


- Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size
can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling <https://github.com/d70-t>`_.
- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`). By `Martin Bergemann <https://github.com/antarcticrainforest>`_ and `Stan West <https://github.com/stanwest>`_.
Expand Down
7 changes: 5 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3440,7 +3440,7 @@ def quantile(
dim: str | Sequence[Hashable] | None = None,
method: QUANTILE_METHODS = "linear",
keep_attrs: bool = None,
skipna: bool = True,
skipna: bool = None,
interpolation: QUANTILE_METHODS = None,
) -> DataArray:
"""Compute the qth quantile of the data along the specified dimension.
Expand Down Expand Up @@ -3486,7 +3486,10 @@ def quantile(
the original object to the new one. If False (default), the new
object will be returned without attributes.
skipna : bool, optional
Whether to skip missing values when aggregating.
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down
7 changes: 5 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6160,7 +6160,7 @@ def quantile(
method: QUANTILE_METHODS = "linear",
numeric_only: bool = False,
keep_attrs: bool = None,
skipna: bool = True,
skipna: bool = None,
interpolation: QUANTILE_METHODS = None,
):
"""Compute the qth quantile of the data along the specified dimension.
Expand Down Expand Up @@ -6209,7 +6209,10 @@ def quantile(
numeric_only : bool, optional
If True, only apply ``func`` to variables with a numeric dtype.
skipna : bool, optional
Whether to skip missing values when aggregating.
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down
7 changes: 5 additions & 2 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ def quantile(
dim=None,
method="linear",
keep_attrs=None,
skipna=True,
skipna=None,
interpolation=None,
):
"""Compute the qth quantile over each array in the groups and
Expand Down Expand Up @@ -597,7 +597,10 @@ def quantile(
version 1.22.0.
skipna : bool, optional
Whether to skip missing values when aggregating.
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down
12 changes: 10 additions & 2 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1978,7 +1978,7 @@ def quantile(
dim: str | Sequence[Hashable] | None = None,
method: QUANTILE_METHODS = "linear",
keep_attrs: bool = None,
skipna: bool = True,
skipna: bool = None,
interpolation: QUANTILE_METHODS = None,
) -> Variable:
"""Compute the qth quantile of the data along the specified dimension.
Expand Down Expand Up @@ -2024,6 +2024,11 @@ def quantile(
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
skipna : bool, optional
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down Expand Up @@ -2059,7 +2064,10 @@ def quantile(

method = interpolation

_quantile_func = np.nanquantile if skipna else np.quantile
if skipna or (skipna is None and self.dtype.kind in "cfO"):
_quantile_func = np.nanquantile
else:
_quantile_func = np.quantile

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
Expand Down
12 changes: 8 additions & 4 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2516,15 +2516,19 @@ def test_reduce_out(self):
with pytest.raises(TypeError):
orig.mean(out=np.ones(orig.shape))

@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
@pytest.mark.parametrize(
"axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
)
def test_quantile(self, q, axis, dim, skipna) -> None:
actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
_percentile_func = np.nanpercentile if skipna else np.percentile
expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis)

va = self.va.copy(deep=True)
va[0, 0] = np.NaN

actual = DataArray(va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
expected = _percentile_func(va.values, np.array(q) * 100, axis=axis)
np.testing.assert_allclose(actual.values, expected)
if is_scalar(q):
assert "quantile" not in actual.dims
Expand Down
3 changes: 2 additions & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4718,10 +4718,11 @@ def test_reduce_keepdims(self):
)
assert_identical(expected, actual)

@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
def test_quantile(self, q, skipna) -> None:
ds = create_test_data(seed=123)
ds.var1.data[0, 0] = np.NaN

for dim in [None, "dim1", ["dim1"]]:
ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)
Expand Down
25 changes: 25 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,17 @@ def test_da_groupby_quantile() -> None:
actual = array.groupby("x").quantile([0, 1])
assert_identical(expected, actual)

array = xr.DataArray(
data=[np.NaN, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x"
)

for skipna in (True, False, None):
e = [np.NaN, 5] if skipna is False else [2.5, 5]

expected = xr.DataArray(data=e, coords={"x": [1, 2], "quantile": 0.5}, dims="x")
actual = array.groupby("x").quantile(0.5, skipna=skipna)
assert_identical(expected, actual)

# Multiple dimensions
array = xr.DataArray(
data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
Expand Down Expand Up @@ -306,6 +317,20 @@ def test_ds_groupby_quantile() -> None:
actual = ds.groupby("x").quantile([0, 1])
assert_identical(expected, actual)

ds = xr.Dataset(
data_vars={"a": ("x", [np.NaN, 2, 3, 4, 5, 6])},
coords={"x": [1, 1, 1, 2, 2, 2]},
)

for skipna in (True, False, None):
e = [np.NaN, 5] if skipna is False else [2.5, 5]

expected = xr.Dataset(
data_vars={"a": ("x", e)}, coords={"quantile": 0.5, "x": [1, 2]}
)
actual = ds.groupby("x").quantile(0.5, skipna=skipna)
assert_identical(expected, actual)

# Multiple dimensions
ds = xr.Dataset(
data_vars={
Expand Down
12 changes: 8 additions & 4 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1700,16 +1700,20 @@ def raise_if_called(*args, **kwargs):
with set_options(use_bottleneck=False):
v.min()

@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
@pytest.mark.parametrize(
"axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
)
def test_quantile(self, q, axis, dim, skipna):
v = Variable(["x", "y"], self.d)

d = self.d.copy()
d[0, 0] = np.NaN

v = Variable(["x", "y"], d)
actual = v.quantile(q, dim=dim, skipna=skipna)
_percentile_func = np.nanpercentile if skipna else np.percentile
expected = _percentile_func(self.d, np.array(q) * 100, axis=axis)
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
expected = _percentile_func(d, np.array(q) * 100, axis=axis)
np.testing.assert_allclose(actual.values, expected)

@requires_dask
Expand Down