Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numpy 1.18 support #3537

Merged
merged 7 commits into from
Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/azure/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ steps:
--pre \
--upgrade \
matplotlib \
numpy \
pandas \
scipy
# numpy \ # FIXME https://github.com/pydata/xarray/issues/3409
pip install \
--no-deps \
--upgrade \
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py36.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- numpy
- pandas
- pint
- pip
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- numpy
- pandas
- pint
- pip
Expand Down
7 changes: 6 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ Bug fixes
(:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
- Allow appending datetime and bool data variables to zarr stores.
(:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
- Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend
(:issue:`3409`, :pull:`3537`). By `Guido Imperiale <https://github.com/crusaderky>`_.
- Add support for pandas >=0.26 (:issue:`3440`).
By `Deepak Cherian <https://github.com/dcherian>`_.
- Add support for pseudonetcdf >=3.1 (:pull:`3485`).
By `Barron Henderson <https://github.com/barronh>`_.

Documentation
~~~~~~~~~~~~~
Expand All @@ -133,7 +139,6 @@ Documentation

Internal Changes
~~~~~~~~~~~~~~~~

- Added integration tests against `pint <https://pint.readthedocs.io/>`_.
(:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
by `Justus Magin <https://github.com/keewis>`_.
Expand Down
34 changes: 32 additions & 2 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,32 @@ def f(values, axis=None, skipna=None, **kwargs):
_mean = _create_nan_agg_method("mean")


def _datetime_nanmin(array):
"""nanmin() function for datetime64.

Caveats that this function deals with:

- In numpy < 1.18, min() on datetime64 incorrectly ignores NaT
- numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
- dask min() does not work on datetime64 (all versions at the moment of writing)
"""
from .dataarray import DataArray
from .variable import Variable

if isinstance(array, (DataArray, Variable)):
crusaderky marked this conversation as resolved.
Show resolved Hide resolved
array = array.data

assert array.dtype.kind in "mM"
dtype = array.dtype
# (NaT).astype(float) does not produce NaN...
array = where(pandas_isnull(array), np.nan, array.astype(float))
array = min(array, skipna=True)
if isinstance(array, float):
array = np.array(array)
# ...but (NaN).astype("M8") does produce NaT
return array.astype(dtype)


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.

Expand All @@ -370,7 +396,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""
# TODO: make this function dask-compatible?
if offset is None:
offset = array.min()
if array.dtype.kind in "Mm":
offset = _datetime_nanmin(array)
else:
offset = min(array)
array = array - offset

if not hasattr(array, "dtype"): # scalar is converted to 0d-array
Expand Down Expand Up @@ -401,7 +430,8 @@ def mean(array, axis=None, skipna=None, **kwargs):

array = asarray(array)
if array.dtype.kind in "Mm":
offset = min(array)
offset = _datetime_nanmin(array)

# xarray always uses np.datetime64[ns] for np.datetime64 data
dtype = "timedelta64[ns]"
return (
Expand Down
50 changes: 33 additions & 17 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,23 +274,39 @@ def assert_dask_array(da, dask):


@arm_xfail
@pytest.mark.parametrize("dask", [False, True])
def test_datetime_reduce(dask):
time = np.array(pd.date_range("15/12/1999", periods=11))
time[8:11] = np.nan
da = DataArray(np.linspace(0, 365, num=11), dims="time", coords={"time": time})

if dask and has_dask:
chunks = {"time": 5}
da = da.chunk(chunks)

actual = da["time"].mean()
assert not pd.isnull(actual)
actual = da["time"].mean(skipna=False)
assert pd.isnull(actual)

# test for a 0d array
assert da["time"][0].mean() == da["time"][:1].mean()
@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
def test_datetime_mean(dask):
# Note: only testing numpy, as dask is broken upstream
da = DataArray(
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8"),
dims=["time"],
)
if dask:
# Trigger use case where a chunk is full of NaT
da = da.chunk({"time": 3})

expect = DataArray(np.array("2010-01-02", dtype="M8"))
expect_nat = DataArray(np.array("NaT", dtype="M8"))

actual = da.mean()
if dask:
assert actual.chunks is not None
assert_equal(actual, expect)

actual = da.mean(skipna=False)
if dask:
assert actual.chunks is not None
assert_equal(actual, expect_nat)

# tests for 1d array full of NaT
assert_equal(da[[1]].mean(), expect_nat)
assert_equal(da[[1]].mean(skipna=False), expect_nat)

# tests for a 0d array
assert_equal(da[0].mean(), da[0])
assert_equal(da[0].mean(skipna=False), da[0])
assert_equal(da[1].mean(), expect_nat)
assert_equal(da[1].mean(skipna=False), expect_nat)


@requires_cftime
Expand Down