diff --git a/doc/api.rst b/doc/api.rst index d3491e020fd..9735eb0c708 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -616,6 +616,7 @@ Accessors :toctree: generated/ core.accessor_dt.DatetimeAccessor + core.accessor_dt.TimedeltaAccessor core.accessor_str.StringAccessor Custom Indexes diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d03cfb948fa..fe05a4d2c21 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,6 +31,10 @@ New Features - Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen` and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ +- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties + and support `.dt` accessor for timedelta + via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) + By `Anderson Banihirwe `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index aff6fbc6691..c407371f9f0 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -1,7 +1,11 @@ import numpy as np import pandas as pd -from .common import _contains_datetime_like_objects, is_np_datetime_like +from .common import ( + _contains_datetime_like_objects, + is_np_datetime_like, + is_np_timedelta_like, +) from .pycompat import dask_array_type @@ -145,37 +149,8 @@ def _strftime(values, date_format): return access_method(values, date_format) -class DatetimeAccessor: - """Access datetime fields for DataArrays with datetime-like dtypes. - - Similar to pandas, fields can be accessed through the `.dt` attribute - for applicable DataArrays: - - >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', - ... freq='D', periods=100)}) - >>> ds.time.dt - - >>> ds.time.dt.dayofyear[:5] - - array([1, 2, 3, 4, 5], dtype=int32) - Coordinates: - * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - - All of the pandas fields are accessible here. Note that these fields are - not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using cftime, then some fields like - `dayofyear` may not be accurate. - - """ - +class Properties: def __init__(self, obj): - if not _contains_datetime_like_objects(obj): - raise TypeError( - "'dt' accessor only available for " - "DataArray with datetime64 timedelta64 dtype or " - "for arrays containing cftime datetime " - "objects." - ) self._obj = obj def _tslib_field_accessor( # type: ignore @@ -194,48 +169,6 @@ def f(self, dtype=dtype): f.__doc__ = docstring return property(f) - year = _tslib_field_accessor("year", "The year of the datetime", np.int64) - month = _tslib_field_accessor( - "month", "The month as January=1, December=12", np.int64 - ) - day = _tslib_field_accessor("day", "The days of the datetime", np.int64) - hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64) - minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64) - second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64) - microsecond = _tslib_field_accessor( - "microsecond", "The microseconds of the datetime", np.int64 - ) - nanosecond = _tslib_field_accessor( - "nanosecond", "The nanoseconds of the datetime", np.int64 - ) - weekofyear = _tslib_field_accessor( - "weekofyear", "The week ordinal of the year", np.int64 - ) - week = weekofyear - dayofweek = _tslib_field_accessor( - "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 - ) - weekday = dayofweek - - weekday_name = _tslib_field_accessor( - "weekday_name", "The name of day in a week (ex: Friday)", object - ) - - dayofyear = _tslib_field_accessor( - "dayofyear", "The ordinal day of the year", np.int64 - ) - quarter = _tslib_field_accessor("quarter", "The quarter of the date") - days_in_month = _tslib_field_accessor( - "days_in_month", "The number of days in the month", np.int64 - ) - daysinmonth = days_in_month - - season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object) - - time = _tslib_field_accessor( - "time", "Timestamps corresponding to datetimes", object - ) - def _tslib_round_accessor(self, name, freq): obj_type = type(self._obj) result = _round_field(self._obj.data, name, freq) @@ -290,6 +223,50 @@ def round(self, freq): """ return self._tslib_round_accessor("round", freq) + +class DatetimeAccessor(Properties): + """Access datetime fields for DataArrays with datetime-like dtypes. + + Fields can be accessed through the `.dt` attribute + for applicable DataArrays. + + Notes + ------ + Note that these fields are not calendar-aware; if your datetimes are encoded + with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime, + then some fields like `dayofyear` may not be accurate. + + Examples + --------- + >>> import xarray as xr + >>> import pandas as pd + >>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000', + '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000', + '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000', + '2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000', + '2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'], + dtype='datetime64[ns]') + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt + + >>> ts.dt.dayofyear + + array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt.quarter + + array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + + """ + def strftime(self, date_format): ''' Return an array of formatted strings specified by date_format, which @@ -323,3 +300,163 @@ def strftime(self, date_format): return obj_type( result, name="strftime", coords=self._obj.coords, dims=self._obj.dims ) + + year = Properties._tslib_field_accessor( + "year", "The year of the datetime", np.int64 + ) + month = Properties._tslib_field_accessor( + "month", "The month as January=1, December=12", np.int64 + ) + day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64) + hour = Properties._tslib_field_accessor( + "hour", "The hours of the datetime", np.int64 + ) + minute = Properties._tslib_field_accessor( + "minute", "The minutes of the datetime", np.int64 + ) + second = Properties._tslib_field_accessor( + "second", "The seconds of the datetime", np.int64 + ) + microsecond = Properties._tslib_field_accessor( + "microsecond", "The microseconds of the datetime", np.int64 + ) + nanosecond = Properties._tslib_field_accessor( + "nanosecond", "The nanoseconds of the datetime", np.int64 + ) + weekofyear = Properties._tslib_field_accessor( + "weekofyear", "The week ordinal of the year", np.int64 + ) + week = weekofyear + dayofweek = Properties._tslib_field_accessor( + "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 + ) + weekday = dayofweek + + weekday_name = Properties._tslib_field_accessor( + "weekday_name", "The name of day in a week", object + ) + + dayofyear = Properties._tslib_field_accessor( + "dayofyear", "The ordinal day of the year", np.int64 + ) + quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date") + days_in_month = Properties._tslib_field_accessor( + "days_in_month", "The number of days in the month", np.int64 + ) + daysinmonth = days_in_month + + season = Properties._tslib_field_accessor("season", "Season of the year", object) + + time = Properties._tslib_field_accessor( + "time", "Timestamps corresponding to datetimes", object + ) + + is_month_start = Properties._tslib_field_accessor( + "is_month_start", + "Indicates whether the date is the first day of the month.", + bool, + ) + is_month_end = Properties._tslib_field_accessor( + "is_month_end", "Indicates whether the date is the last day of the month.", bool + ) + is_quarter_start = Properties._tslib_field_accessor( + "is_quarter_start", + "Indicator for whether the date is the first day of a quarter.", + bool, + ) + is_quarter_end = Properties._tslib_field_accessor( + "is_quarter_end", + "Indicator for whether the date is the last day of a quarter.", + bool, + ) + is_year_start = Properties._tslib_field_accessor( + "is_year_start", "Indicate whether the date is the first day of a year.", bool + ) + is_year_end = Properties._tslib_field_accessor( + "is_year_end", "Indicate whether the date is the last day of the year.", bool + ) + is_leap_year = Properties._tslib_field_accessor( + "is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool + ) + + +class TimedeltaAccessor(Properties): + """Access Timedelta fields for DataArrays with Timedelta-like dtypes. + + Fields can be accessed through the `.dt` attribute for applicable DataArrays. + + Examples + -------- + >>> import pandas as pd + >>> import xarray as xr + >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000, + 172800000000000, 194400000000000, 216000000000000, 237600000000000, + 259200000000000, 280800000000000, 302400000000000, 324000000000000, + 345600000000000, 367200000000000, 388800000000000, 410400000000000, + 432000000000000, 453600000000000, 475200000000000, 496800000000000], + dtype='timedelta64[ns]') + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt + + >>> ts.dt.days + + array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.microseconds + + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.seconds + + array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, + 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600, + 43200, 64800]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + """ + + days = Properties._tslib_field_accessor( + "days", "Number of days for each element.", np.int64 + ) + seconds = Properties._tslib_field_accessor( + "seconds", + "Number of seconds (>= 0 and less than 1 day) for each element.", + np.int64, + ) + microseconds = Properties._tslib_field_accessor( + "microseconds", + "Number of microseconds (>= 0 and less than 1 second) for each element.", + np.int64, + ) + nanoseconds = Properties._tslib_field_accessor( + "nanoseconds", + "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", + np.int64, + ) + + +class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor): + def __new__(cls, obj): + # CombinedDatetimelikeAccessor isn't really instatiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + if not _contains_datetime_like_objects(obj): + raise TypeError( + "'.dt' accessor only available for " + "DataArray with datetime64 timedelta64 dtype or " + "for arrays containing cftime datetime " + "objects." + ) + + if is_np_timedelta_like(obj.dtype): + return TimedeltaAccessor(obj) + else: + return DatetimeAccessor(obj) diff --git a/xarray/core/common.py b/xarray/core/common.py index a74318b2f90..e908c69dd14 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool: return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) +def is_np_timedelta_like(dtype: DTypeLike) -> bool: + """Check whether dtype is of the timedelta64 dtype. + """ + return np.issubdtype(dtype, np.timedelta64) + + def _contains_cftime_datetimes(array) -> bool: """Check if an array contains cftime.datetime objects """ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 31cd3c713f6..31aa4da57b2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -33,7 +33,7 @@ rolling, utils, ) -from .accessor_dt import DatetimeAccessor +from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( _broadcast_helper, @@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords): _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample - dt = property(DatetimeAccessor) + dt = property(CombinedDatetimelikeAccessor) def __init__( self, diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 5fe5b8c3f59..67ca12532c7 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -12,6 +12,8 @@ requires_dask, ) +from .test_dask import raise_if_dask_computes, assert_chunks_equal + class TestDatetimeAccessor: @pytest.fixture(autouse=True) @@ -37,24 +39,38 @@ def setup(self): name="data", ) - def test_field_access(self): - years = xr.DataArray( - self.times.year, name="year", coords=[self.times], dims=["time"] - ) - months = xr.DataArray( - self.times.month, name="month", coords=[self.times], dims=["time"] - ) - days = xr.DataArray( - self.times.day, name="day", coords=[self.times], dims=["time"] - ) - hours = xr.DataArray( - self.times.hour, name="hour", coords=[self.times], dims=["time"] + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] ) - - assert_equal(years, self.data.time.dt.year) - assert_equal(months, self.data.time.dt.month) - assert_equal(days, self.data.time.dt.day) - assert_equal(hours, self.data.time.dt.hour) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) def test_strftime(self): assert ( @@ -69,55 +85,74 @@ def test_not_datetime_type(self): nontime_data.time.dt @requires_dask - def test_dask_field_access(self): + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) + def test_dask_field_access(self, field): import dask.array as da - years = self.times_data.dt.year - months = self.times_data.dt.month - hours = self.times_data.dt.hour - days = self.times_data.dt.day - floor = self.times_data.dt.floor("D") - ceil = self.times_data.dt.ceil("D") - round = self.times_data.dt.round("D") - strftime = self.times_data.dt.strftime("%Y-%m-%d %H:%M:%S") + expected = getattr(self.times_data.dt, field) + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) + + @requires_dask + @pytest.mark.parametrize( + "method, parameters", + [ + ("floor", "D"), + ("ceil", "D"), + ("round", "D"), + ("strftime", "%Y-%m-%d %H:%M:%S"), + ], + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da + expected = getattr(self.times_data.dt, method)(parameters) dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) dask_times_2d = xr.DataArray( dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" ) - dask_year = dask_times_2d.dt.year - dask_month = dask_times_2d.dt.month - dask_day = dask_times_2d.dt.day - dask_hour = dask_times_2d.dt.hour - dask_floor = dask_times_2d.dt.floor("D") - dask_ceil = dask_times_2d.dt.ceil("D") - dask_round = dask_times_2d.dt.round("D") - dask_strftime = dask_times_2d.dt.strftime("%Y-%m-%d %H:%M:%S") - - # Test that the data isn't eagerly evaluated - assert isinstance(dask_year.data, da.Array) - assert isinstance(dask_month.data, da.Array) - assert isinstance(dask_day.data, da.Array) - assert isinstance(dask_hour.data, da.Array) - assert isinstance(dask_strftime.data, da.Array) - - # Double check that outcome chunksize is unchanged - dask_chunks = dask_times_2d.chunks - assert dask_year.data.chunks == dask_chunks - assert dask_month.data.chunks == dask_chunks - assert dask_day.data.chunks == dask_chunks - assert dask_hour.data.chunks == dask_chunks - assert dask_strftime.data.chunks == dask_chunks - - # Check the actual output from the accessors - assert_equal(years, dask_year.compute()) - assert_equal(months, dask_month.compute()) - assert_equal(days, dask_day.compute()) - assert_equal(hours, dask_hour.compute()) - assert_equal(floor, dask_floor.compute()) - assert_equal(ceil, dask_ceil.compute()) - assert_equal(round, dask_round.compute()) - assert_equal(strftime, dask_strftime.compute()) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) def test_seasons(self): dates = pd.date_range(start="2000/01/01", freq="M", periods=12) @@ -140,12 +175,108 @@ def test_seasons(self): assert_array_equal(seasons.values, dates.dt.season.values) - def test_rounders(self): + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_method(self, method, parameters): dates = pd.date_range("2014-01-01", "2014-05-01", freq="H") - xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates]) - assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values) - assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values) - assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) + + +class TestTimedeltaAccessor: + @pytest.fixture(autouse=True) + def setup(self): + nt = 100 + data = np.random.rand(10, 10, nt) + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + self.times = pd.timedelta_range(start="1 day", freq="6H", periods=nt) + + self.data = xr.DataArray( + data, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + self.times_arr = np.random.choice(self.times, size=(10, 10, nt)) + self.times_data = xr.DataArray( + self.times_arr, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + def test_not_datetime_type(self): + nontime_data = self.data.copy() + int_data = np.arange(len(self.data.time)).astype("int8") + nontime_data["time"].values = int_data + with raises_regex(TypeError, "dt"): + nontime_data.time.dt + + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] + ) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) + + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_methods(self, method, parameters): + dates = pd.timedelta_range(start="1 day", end="30 days", freq="6H") + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) + + @requires_dask + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_dask_field_access(self, field): + import dask.array as da + + expected = getattr(self.times_data.dt, field) + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual, expected) + + @requires_dask + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da + + expected = getattr(self.times_data.dt, method)(parameters) + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) _CFTIME_CALENDARS = [