diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4497c57e5f2..918106a45df 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -123,6 +123,10 @@ Bug fixes By `Spencer Clark `_. - Avoid use of Dask's deprecated ``get=`` parameter in tests by `Matthew Rocklin `_. +- An ``OverflowError`` is now accurately raised and caught during the + encoding process if a reference date is used that is so distant that + the dates must be encoded using cftime rather than NumPy (:issue:`2272`). + By `Spencer Clark `_. .. _whats-new.0.10.9: diff --git a/xarray/coding/times.py b/xarray/coding/times.py index dff7e75bdcf..16380976def 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -361,7 +361,12 @@ def encode_cf_datetime(dates, units=None, calendar=None): delta_units = _netcdf_to_numpy_timeunit(delta) time_delta = np.timedelta64(1, delta_units).astype('timedelta64[ns]') ref_date = np.datetime64(pd.Timestamp(ref_date)) - num = (dates - ref_date) / time_delta + + # Wrap the dates in a DatetimeIndex to do the subtraction to ensure + # an OverflowError is raised if the ref_date is too far away from + # dates to be encoded (GH 2272). + num = (pd.DatetimeIndex(dates.ravel()) - ref_date) / time_delta + num = num.values.reshape(dates.shape) except (OutOfBoundsDatetime, OverflowError): num = _encode_datetime_with_cftime(dates, units, calendar) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 10a1a956b27..8e47bd37eac 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,7 +8,8 @@ import pytest from xarray import DataArray, Variable, coding, decode_cf, set_options -from xarray.coding.times import _import_cftime +from xarray.coding.times import (_import_cftime, decode_cf_datetime, + encode_cf_datetime) from xarray.coding.variables import SerializationWarning from xarray.core.common import contains_cftime_datetimes @@ -763,3 +764,16 @@ def test_contains_cftime_datetimes_non_cftimes(non_cftime_data): @pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data.chunk()) + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize('shape', [(24,), (8, 3), (2, 4, 3)]) +def test_encode_datetime_overflow(shape): + # Test for fix to GH 2272 + dates = pd.date_range('2100', periods=24).values.reshape(shape) + units = 'days since 1800-01-01' + calendar = 'standard' + + num, _, _ = encode_cf_datetime(dates, units, calendar) + roundtrip = decode_cf_datetime(num, units, calendar) + np.testing.assert_array_equal(dates, roundtrip)