diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0e48dfcfc78..4aad6ad3701 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,8 +29,8 @@ Breaking changes - scipy 1.3 - Remove ``compat`` and ``encoding`` kwargs from ``DataArray``, which - have been deprecated since 0.12. (:pull:`3650`). - Instead, specify the encoding when writing to disk or set + have been deprecated since 0.12. (:pull:`3650`). + Instead, specify the encoding when writing to disk or set the ``encoding`` attribute directly. By `Maximilian Roos `_ - :py:func:`xarray.dot`, :py:meth:`DataArray.dot`, and the ``@`` operator now @@ -67,10 +67,15 @@ New Features - :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` now allow swapping to dimension names that don't exist yet. (:pull:`3636`) By `Justus Magin `_. -- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties - and support `.dt` accessor for timedelta +- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties + and support `.dt` accessor for timedelta via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) By `Anderson Banihirwe `_. +- Support CFTimeIndex in :py:meth:`DataArray.interpolate_na`, define 1970-01-01 + as the default offset for the interpolation index for both DatetimeIndex and + CFTimeIndex, use microseconds in the conversion from timedelta objects + to floats to avoid overflow errors (:issue:`3641`, :pull:`3631`). + By David Huard ``_. Bug fixes ~~~~~~~~~ diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 4005d4fbf6d..8b440812ca9 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -430,7 +430,14 @@ def __sub__(self, other): import cftime if isinstance(other, (CFTimeIndex, cftime.datetime)): - return pd.TimedeltaIndex(np.array(self) - np.array(other)) + try: + return pd.TimedeltaIndex(np.array(self) - np.array(other)) + except OverflowError: + raise ValueError( + "The time difference exceeds the range of values " + "that can be expressed at the nanosecond resolution." + ) + elif isinstance(other, pd.TimedeltaIndex): return CFTimeIndex(np.array(self) - other.to_pytimedelta()) else: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4fc6773c459..a658f125054 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -18,6 +18,7 @@ cast, ) +import datetime import numpy as np import pandas as pd @@ -2041,7 +2042,9 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + max_gap: Union[ + int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta + ] = None, **kwargs: Any, ) -> "DataArray": """Fill in NaNs by interpolating according to different methods. @@ -2073,7 +2076,7 @@ def interpolate_na( or None for no limit. This filling is done regardless of the size of the gap in the data. To only interpolate over gaps less than a given length, see ``max_gap``. - max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default None. Maximum size of gap, a continuous sequence of NaNs, that will be filled. Use None for no limit. When interpolating along a datetime64 dimension and ``use_coordinate=True``, ``max_gap`` can be one of the following: @@ -2081,6 +2084,7 @@ def interpolate_na( - a string that is valid input for pandas.to_timedelta - a :py:class:`numpy.timedelta64` object - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled dimensions has not been implemented yet. Gap length is defined as the difference diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6f1ea76be4c..c72ed6cc7d6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -27,6 +27,7 @@ cast, ) +import datetime import numpy as np import pandas as pd @@ -3995,7 +3996,9 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + max_gap: Union[ + int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta + ] = None, **kwargs: Any, ) -> "Dataset": """Fill in NaNs by interpolating according to different methods. @@ -4028,7 +4031,7 @@ def interpolate_na( or None for no limit. This filling is done regardless of the size of the gap in the data. To only interpolate over gaps less than a given length, see ``max_gap``. - max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default None. Maximum size of gap, a continuous sequence of NaNs, that will be filled. Use None for no limit. When interpolating along a datetime64 dimension and ``use_coordinate=True``, ``max_gap`` can be one of the following: @@ -4036,6 +4039,7 @@ def interpolate_na( - a string that is valid input for pandas.to_timedelta - a :py:class:`numpy.timedelta64` object - a :py:class:`pandas.Timedelta` object + - a :py:class:`datetime.timedelta` object Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled dimensions has not been implemented yet. Gap length is defined as the difference diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 3fa97f0b448..c2fe604a9d3 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -372,44 +372,100 @@ def _datetime_nanmin(array): def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): - """Convert an array containing datetime-like data to an array of floats. + """Convert an array containing datetime-like data to numerical values. + + Convert the datetime array to a timedelta relative to an offset. Parameters ---------- - da : np.array - Input data - offset: Scalar with the same type of array or None - If None, subtract minimum values to reduce round off error - datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', - 'us', 'ns', 'ps', 'fs', 'as'} - dtype: target dtype + da : array-like + Input data + offset: None, datetime or cftime.datetime + Datetime offset. If None, this is set by default to the array's minimum + value to reduce round off errors. + datetime_unit: {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} + If not None, convert output to a given datetime unit. Note that some + conversions are not allowed due to non-linear relationships between units. + dtype: dtype + Output dtype. Returns ------- array + Numerical representation of datetime object relative to an offset. + + Notes + ----- + Some datetime unit conversions won't work, for example from days to years, even + though some calendars would allow for them (e.g. no_leap). This is because there + is no `cftime.timedelta` object. """ # TODO: make this function dask-compatible? + # Set offset to minimum if not given if offset is None: if array.dtype.kind in "Mm": offset = _datetime_nanmin(array) else: offset = min(array) + + # Compute timedelta object. + # For np.datetime64, this can silently yield garbage due to overflow. + # One option is to enforce 1970-01-01 as the universal offset. array = array - offset - if not hasattr(array, "dtype"): # scalar is converted to 0d-array + # Scalar is converted to 0d-array + if not hasattr(array, "dtype"): array = np.array(array) + # Convert timedelta objects to float by first converting to microseconds. if array.dtype.kind in "O": - # possibly convert object array containing datetime.timedelta - array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) + return py_timedelta_to_float(array, datetime_unit or "ns").astype(dtype) - if datetime_unit: - array = array / np.timedelta64(1, datetime_unit) + # Convert np.NaT to np.nan + elif array.dtype.kind in "mM": - # convert np.NaT to np.nan - if array.dtype.kind in "mM": + # Convert to specified timedelta units. + if datetime_unit: + array = array / np.timedelta64(1, datetime_unit) return np.where(isnull(array), np.nan, array.astype(dtype)) - return array.astype(dtype) + + +def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): + """Convert a timedelta-like object to numerical values. + + Parameters + ---------- + value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, str + Time delta representation. + datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} + The time units of the output values. Note that some conversions are not allowed due to + non-linear relationships between units. + dtype : type + The output data type. + + """ + import datetime as dt + + if isinstance(value, dt.timedelta): + out = py_timedelta_to_float(value, datetime_unit) + elif isinstance(value, np.timedelta64): + out = np_timedelta64_to_float(value, datetime_unit) + elif isinstance(value, pd.Timedelta): + out = pd_timedelta_to_float(value, datetime_unit) + elif isinstance(value, str): + try: + a = pd.to_timedelta(value) + except ValueError: + raise ValueError( + f"Could not convert {value!r} to timedelta64 using pandas.to_timedelta" + ) + return py_timedelta_to_float(a, datetime_unit) + else: + raise TypeError( + f"Expected value of type str, pandas.Timedelta, datetime.timedelta " + f"or numpy.timedelta64, but received {type(value).__name__}" + ) + return out.astype(dtype) def _to_pytimedelta(array, unit="us"): @@ -417,6 +473,40 @@ def _to_pytimedelta(array, unit="us"): return index.to_pytimedelta().reshape(array.shape) +def np_timedelta64_to_float(array, datetime_unit): + """Convert numpy.timedelta64 to float. + + Notes + ----- + The array is first converted to microseconds, which is less likely to + cause overflow errors. + """ + array = array.astype("timedelta64[ns]").astype(np.float64) + conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit) + return conversion_factor * array + + +def pd_timedelta_to_float(value, datetime_unit): + """Convert pandas.Timedelta to float. + + Notes + ----- + Built on the assumption that pandas timedelta values are in nanoseconds, + which is also the numpy default resolution. + """ + value = value.to_timedelta64() + return np_timedelta64_to_float(value, datetime_unit) + + +def py_timedelta_to_float(array, datetime_unit): + """Convert a timedelta object to a float, possibly at a loss of resolution. + """ + array = np.asarray(array) + array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) + return conversion_factor * array + + def mean(array, axis=None, skipna=None, **kwargs): """inhouse mean that can handle np.datetime64 or cftime.datetime dtypes""" diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 117fcaf8f81..b20441e993c 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -2,6 +2,7 @@ from functools import partial from numbers import Number from typing import Any, Callable, Dict, Hashable, Sequence, Union +import datetime as dt import numpy as np import pandas as pd @@ -9,7 +10,7 @@ from . import utils from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc -from .duck_array_ops import dask_array_type +from .duck_array_ops import dask_array_type, datetime_to_numeric, timedelta_to_numeric from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -207,52 +208,81 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True): - """get index to use for x values in interpolation. + """Return index to use for x values in interpolation or curve fitting. - If use_coordinate is True, the coordinate that shares the name of the - dimension along which interpolation is being performed will be used as the - x values. + Parameters + ---------- + arr : DataArray + Array to interpolate or fit to a curve. + dim : str + Name of dimension along which to fit. + use_coordinate : str or bool + If use_coordinate is True, the coordinate that shares the name of the + dimension along which interpolation is being performed will be used as the + x values. If False, the x values are set as an equally spaced sequence. + + Returns + ------- + Variable + Numerical values for the x-coordinates. - If use_coordinate is False, the x values are set as an equally spaced - sequence. + Notes + ----- + If indexing is along the time dimension, datetime coordinates are converted + to time deltas with respect to 1970-01-01. """ - if use_coordinate: - if use_coordinate is True: - index = arr.get_index(dim) - else: - index = arr.coords[use_coordinate] - if index.ndim != 1: - raise ValueError( - f"Coordinates used for interpolation must be 1D, " - f"{use_coordinate} is {index.ndim}D." - ) - index = index.to_index() - - # TODO: index.name is None for multiindexes - # set name for nice error messages below - if isinstance(index, pd.MultiIndex): - index.name = dim - - if not index.is_monotonic: - raise ValueError(f"Index {index.name!r} must be monotonically increasing") - - if not index.is_unique: - raise ValueError(f"Index {index.name!r} has duplicate values") - - # raise if index cannot be cast to a float (e.g. MultiIndex) - try: - index = index.values.astype(np.float64) - except (TypeError, ValueError): - # pandas raises a TypeError - # xarray/numpy raise a ValueError - raise TypeError( - f"Index {index.name!r} must be castable to float64 to support " - f"interpolation, got {type(index).__name__}." - ) - else: + # Question: If use_coordinate is a string, what role does `dim` play? + from xarray.coding.cftimeindex import CFTimeIndex + + if use_coordinate is False: axis = arr.get_axis_num(dim) - index = np.arange(arr.shape[axis], dtype=np.float64) + return np.arange(arr.shape[axis], dtype=np.float64) + + if use_coordinate is True: + index = arr.get_index(dim) + + else: # string + index = arr.coords[use_coordinate] + if index.ndim != 1: + raise ValueError( + f"Coordinates used for interpolation must be 1D, " + f"{use_coordinate} is {index.ndim}D." + ) + index = index.to_index() + + # TODO: index.name is None for multiindexes + # set name for nice error messages below + if isinstance(index, pd.MultiIndex): + index.name = dim + + if not index.is_monotonic: + raise ValueError(f"Index {index.name!r} must be monotonically increasing") + + if not index.is_unique: + raise ValueError(f"Index {index.name!r} has duplicate values") + + # Special case for non-standard calendar indexes + # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds + if isinstance(index, (CFTimeIndex, pd.DatetimeIndex)): + offset = type(index[0])(1970, 1, 1) + if isinstance(index, CFTimeIndex): + index = index.values + index = Variable( + data=datetime_to_numeric(index, offset=offset, datetime_unit="ns"), + dims=(dim,), + ) + + # raise if index cannot be cast to a float (e.g. MultiIndex) + try: + index = index.values.astype(np.float64) + except (TypeError, ValueError): + # pandas raises a TypeError + # xarray/numpy raise a ValueError + raise TypeError( + f"Index {index.name!r} must be castable to float64 to support " + f"interpolation, got {type(index).__name__}." + ) return index @@ -263,11 +293,13 @@ def interp_na( use_coordinate: Union[bool, str] = True, method: str = "linear", limit: int = None, - max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, **kwargs, ): """Interpolate values according to different methods. """ + from xarray.coding.cftimeindex import CFTimeIndex + if dim is None: raise NotImplementedError("dim is a required argument") @@ -281,26 +313,11 @@ def interp_na( if ( dim in self.indexes - and isinstance(self.indexes[dim], pd.DatetimeIndex) + and isinstance(self.indexes[dim], (pd.DatetimeIndex, CFTimeIndex)) and use_coordinate ): - if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): - raise TypeError( - f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}" - ) - - if isinstance(max_gap, str): - try: - max_gap = pd.to_timedelta(max_gap) - except ValueError: - raise ValueError( - f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta" - ) - - if isinstance(max_gap, pd.Timedelta): - max_gap = np.timedelta64(max_gap.value, "ns") - - max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) + # Convert to float + max_gap = timedelta_to_numeric(max_gap) if not use_coordinate: if not isinstance(max_gap, (Number, np.number)): diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index aee7bbd6b11..96c883baa67 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -1,6 +1,7 @@ import warnings from textwrap import dedent +import datetime as dt import numpy as np import pandas as pd import pytest @@ -19,6 +20,10 @@ rolling_window, stack, where, + py_timedelta_to_float, + np_timedelta64_to_float, + pd_timedelta_to_float, + timedelta_to_numeric, ) from xarray.core.pycompat import dask_array_type from xarray.testing import assert_allclose, assert_equal @@ -672,13 +677,15 @@ def test_datetime_to_numeric_datetime64(): @requires_cftime def test_datetime_to_numeric_cftime(): - times = cftime_range("2000", periods=5, freq="7D").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") + times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h") + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h", dtype=int + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) @@ -686,3 +693,70 @@ def test_datetime_to_numeric_cftime(): result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) + + +@requires_cftime +def test_datetime_to_numeric_potential_overflow(): + import cftime + + times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]") + cftimes = cftime_range( + "2000", periods=5, freq="7D", calendar="proleptic_gregorian" + ).values + + offset = np.datetime64("0001-01-01") + cfoffset = cftime.DatetimeProlepticGregorian(1, 1, 1) + + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="D", dtype=int + ) + cfresult = duck_array_ops.datetime_to_numeric( + cftimes, offset=cfoffset, datetime_unit="D", dtype=int + ) + + expected = 730119 + np.arange(0, 35, 7) + + np.testing.assert_array_equal(result, expected) + np.testing.assert_array_equal(cfresult, expected) + + +def test_py_timedelta_to_float(): + assert py_timedelta_to_float(dt.timedelta(days=1), "ns") == 86400 * 1e9 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ps") == 86400 * 1e18 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ns") == 86400 * 1e15 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "us") == 86400 * 1e12 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "ms") == 86400 * 1e9 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "s") == 86400 * 1e6 + assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 + + +@pytest.mark.parametrize( + "td, expected", + ([np.timedelta64(1, "D"), 86400 * 1e9], [np.timedelta64(1, "ns"), 1.0]), +) +def test_np_timedelta64_to_float(td, expected): + out = np_timedelta64_to_float(td, datetime_unit="ns") + np.testing.assert_allclose(out, expected) + assert isinstance(out, float) + + out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit="ns") + np.testing.assert_allclose(out, expected) + + +@pytest.mark.parametrize( + "td, expected", ([pd.Timedelta(1, "D"), 86400 * 1e9], [pd.Timedelta(1, "ns"), 1.0]) +) +def test_pd_timedelta_to_float(td, expected): + out = pd_timedelta_to_float(td, datetime_unit="ns") + np.testing.assert_allclose(out, expected) + assert isinstance(out, float) + + +@pytest.mark.parametrize( + "td", [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"], +) +def test_timedelta_to_numeric(td): + # Scalar input + out = timedelta_to_numeric(td, "ns") + np.testing.assert_allclose(out, 86400 * 1e9) + assert isinstance(out, float) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index b93325d7eab..e3af8b5873a 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -662,3 +662,10 @@ def test_datetime_interp_noerror(): coords={"time": pd.date_range("01-01-2001", periods=50, freq="H")}, ) a.interp(x=xi, time=xi.time) # should not raise an error + + +@requires_cftime +def test_3641(): + times = xr.cftime_range("0001", periods=3, freq="500Y") + da = xr.DataArray(range(3), dims=["time"], coords=[times]) + da.interp(time=["0002-05-01"]) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 1cd0319a9a5..8d70d9a0fcc 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -16,18 +16,34 @@ from xarray.tests import ( assert_array_equal, assert_equal, + assert_allclose, raises_regex, requires_bottleneck, requires_dask, requires_scipy, + requires_cftime, ) +from xarray.tests.test_cftime_offsets import _CFTIME_CALENDARS + @pytest.fixture def da(): return xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") +@pytest.fixture +def cf_da(): + def _cf_da(calendar, freq="1D"): + times = xr.cftime_range( + start="1970-01-01", freq=freq, periods=10, calendar=calendar + ) + values = np.arange(10) + return xr.DataArray(values, dims=("time",), coords={"time": times}) + + return _cf_da + + @pytest.fixture def ds(): ds = xr.Dataset() @@ -472,6 +488,42 @@ def test_interpolate_na_nan_block_lengths(y, lengths): assert_equal(actual, expected) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_get_clean_interp_index_cf_calendar(cf_da, calendar): + """The index for CFTimeIndex is in units of days. This means that if two series using a 360 and 365 days + calendar each have a trend of .01C/year, the linear regression coefficients will be different because they + have different number of days. + + Another option would be to have an index in units of years, but this would likely create other difficulties. + """ + i = get_clean_interp_index(cf_da(calendar), dim="time") + np.testing.assert_array_equal(i, np.arange(10) * 1e9 * 86400) + + +@requires_cftime +@pytest.mark.parametrize( + ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"]) +) +def test_get_clean_interp_index_dt(cf_da, calendar, freq): + """In the gregorian case, the index should be proportional to normal datetimes.""" + g = cf_da(calendar, freq=freq) + g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",)) + + gi = get_clean_interp_index(g, "time") + si = get_clean_interp_index(g, "time", use_coordinate="stime") + np.testing.assert_array_equal(gi, si) + + +def test_get_clean_interp_index_potential_overflow(): + da = xr.DataArray( + [0, 1, 2], + dims=("time",), + coords={"time": xr.cftime_range("0000-01-01", periods=3, calendar="360_day")}, + ) + get_clean_interp_index(da, "time") + + @pytest.fixture def da_time(): return xr.DataArray( @@ -490,7 +542,7 @@ def test_interpolate_na_max_gap_errors(da_time): da_time.interpolate_na("t", max_gap=(1,)) da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11) - with raises_regex(TypeError, "Underlying index is"): + with raises_regex(TypeError, "Expected value of type str"): da_time.interpolate_na("t", max_gap=1) with raises_regex(TypeError, "Expected integer or floating point"): @@ -501,10 +553,7 @@ def test_interpolate_na_max_gap_errors(da_time): @requires_bottleneck -@pytest.mark.parametrize( - "time_range_func", - [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)], -) +@pytest.mark.parametrize("time_range_func", [pd.date_range, xr.cftime_range]) @pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")]) @pytest.mark.parametrize( "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")] @@ -517,7 +566,7 @@ def test_interpolate_na_max_gap_time_specifier( da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10]) ) actual = transform(da_time).interpolate_na("t", max_gap=max_gap) - assert_equal(actual, expected) + assert_allclose(actual, expected) @requires_bottleneck