Skip to content

Commit

Permalink
quantile: use skipna=None (#6303)
Browse files Browse the repository at this point in the history
* quantile: use skipna=None

* better term

* move whats new entry

* remove duplicated entry
  • Loading branch information
mathause authored Mar 3, 2022
1 parent e2b40d7 commit a01460b
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 18 deletions.
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ Deprecations
Bug fixes
~~~~~~~~~

- Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and
ensure it skips missing values for float dtypes (consistent with other methods). This should
not change the behavior (:pull:`6303`). By `Mathias Hauser <https://github.com/mathause>`_.

Documentation
~~~~~~~~~~~~~
Expand Down Expand Up @@ -86,7 +89,6 @@ Deprecations
Bug fixes
~~~~~~~~~


- Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size
can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling <https://github.com/d70-t>`_.
- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`). By `Martin Bergemann <https://github.com/antarcticrainforest>`_ and `Stan West <https://github.com/stanwest>`_.
Expand Down
7 changes: 5 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3440,7 +3440,7 @@ def quantile(
dim: str | Sequence[Hashable] | None = None,
method: QUANTILE_METHODS = "linear",
keep_attrs: bool = None,
skipna: bool = True,
skipna: bool = None,
interpolation: QUANTILE_METHODS = None,
) -> DataArray:
"""Compute the qth quantile of the data along the specified dimension.
Expand Down Expand Up @@ -3486,7 +3486,10 @@ def quantile(
the original object to the new one. If False (default), the new
object will be returned without attributes.
skipna : bool, optional
Whether to skip missing values when aggregating.
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down
7 changes: 5 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6160,7 +6160,7 @@ def quantile(
method: QUANTILE_METHODS = "linear",
numeric_only: bool = False,
keep_attrs: bool = None,
skipna: bool = True,
skipna: bool = None,
interpolation: QUANTILE_METHODS = None,
):
"""Compute the qth quantile of the data along the specified dimension.
Expand Down Expand Up @@ -6209,7 +6209,10 @@ def quantile(
numeric_only : bool, optional
If True, only apply ``func`` to variables with a numeric dtype.
skipna : bool, optional
Whether to skip missing values when aggregating.
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down
7 changes: 5 additions & 2 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ def quantile(
dim=None,
method="linear",
keep_attrs=None,
skipna=True,
skipna=None,
interpolation=None,
):
"""Compute the qth quantile over each array in the groups and
Expand Down Expand Up @@ -597,7 +597,10 @@ def quantile(
version 1.22.0.
skipna : bool, optional
Whether to skip missing values when aggregating.
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down
12 changes: 10 additions & 2 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1978,7 +1978,7 @@ def quantile(
dim: str | Sequence[Hashable] | None = None,
method: QUANTILE_METHODS = "linear",
keep_attrs: bool = None,
skipna: bool = True,
skipna: bool = None,
interpolation: QUANTILE_METHODS = None,
) -> Variable:
"""Compute the qth quantile of the data along the specified dimension.
Expand Down Expand Up @@ -2024,6 +2024,11 @@ def quantile(
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
skipna : bool, optional
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
Returns
-------
Expand Down Expand Up @@ -2059,7 +2064,10 @@ def quantile(

method = interpolation

_quantile_func = np.nanquantile if skipna else np.quantile
if skipna or (skipna is None and self.dtype.kind in "cfO"):
_quantile_func = np.nanquantile
else:
_quantile_func = np.quantile

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
Expand Down
12 changes: 8 additions & 4 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2516,15 +2516,19 @@ def test_reduce_out(self):
with pytest.raises(TypeError):
orig.mean(out=np.ones(orig.shape))

@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
@pytest.mark.parametrize(
"axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
)
def test_quantile(self, q, axis, dim, skipna) -> None:
actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
_percentile_func = np.nanpercentile if skipna else np.percentile
expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis)

va = self.va.copy(deep=True)
va[0, 0] = np.NaN

actual = DataArray(va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
expected = _percentile_func(va.values, np.array(q) * 100, axis=axis)
np.testing.assert_allclose(actual.values, expected)
if is_scalar(q):
assert "quantile" not in actual.dims
Expand Down
3 changes: 2 additions & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4718,10 +4718,11 @@ def test_reduce_keepdims(self):
)
assert_identical(expected, actual)

@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
def test_quantile(self, q, skipna) -> None:
ds = create_test_data(seed=123)
ds.var1.data[0, 0] = np.NaN

for dim in [None, "dim1", ["dim1"]]:
ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)
Expand Down
25 changes: 25 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,17 @@ def test_da_groupby_quantile() -> None:
actual = array.groupby("x").quantile([0, 1])
assert_identical(expected, actual)

array = xr.DataArray(
data=[np.NaN, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x"
)

for skipna in (True, False, None):
e = [np.NaN, 5] if skipna is False else [2.5, 5]

expected = xr.DataArray(data=e, coords={"x": [1, 2], "quantile": 0.5}, dims="x")
actual = array.groupby("x").quantile(0.5, skipna=skipna)
assert_identical(expected, actual)

# Multiple dimensions
array = xr.DataArray(
data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
Expand Down Expand Up @@ -306,6 +317,20 @@ def test_ds_groupby_quantile() -> None:
actual = ds.groupby("x").quantile([0, 1])
assert_identical(expected, actual)

ds = xr.Dataset(
data_vars={"a": ("x", [np.NaN, 2, 3, 4, 5, 6])},
coords={"x": [1, 1, 1, 2, 2, 2]},
)

for skipna in (True, False, None):
e = [np.NaN, 5] if skipna is False else [2.5, 5]

expected = xr.Dataset(
data_vars={"a": ("x", e)}, coords={"quantile": 0.5, "x": [1, 2]}
)
actual = ds.groupby("x").quantile(0.5, skipna=skipna)
assert_identical(expected, actual)

# Multiple dimensions
ds = xr.Dataset(
data_vars={
Expand Down
12 changes: 8 additions & 4 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1700,16 +1700,20 @@ def raise_if_called(*args, **kwargs):
with set_options(use_bottleneck=False):
v.min()

@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
@pytest.mark.parametrize(
"axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
)
def test_quantile(self, q, axis, dim, skipna):
v = Variable(["x", "y"], self.d)

d = self.d.copy()
d[0, 0] = np.NaN

v = Variable(["x", "y"], d)
actual = v.quantile(q, dim=dim, skipna=skipna)
_percentile_func = np.nanpercentile if skipna else np.percentile
expected = _percentile_func(self.d, np.array(q) * 100, axis=axis)
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
expected = _percentile_func(d, np.array(q) * 100, axis=axis)
np.testing.assert_allclose(actual.values, expected)

@requires_dask
Expand Down

0 comments on commit a01460b

Please sign in to comment.