From c8a9fe1f780cf2cee1988b9e5c788c62839d2d11 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 11 Dec 2019 17:52:30 -0700 Subject: [PATCH 01/14] Support `.dt` accessor for Timedelta --- xarray/core/accessor_dt.py | 232 +++++++++++++++++++------------ xarray/core/common.py | 6 + xarray/core/dataarray.py | 4 +- xarray/tests/test_accessor_dt.py | 114 ++++++++++++++- 4 files changed, 264 insertions(+), 92 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index aff6fbc6691..c9078da96c4 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -1,7 +1,11 @@ import numpy as np import pandas as pd -from .common import _contains_datetime_like_objects, is_np_datetime_like +from .common import ( + _contains_datetime_like_objects, + is_np_datetime_like, + _is_timedelta64_dtype, +) from .pycompat import dask_array_type @@ -145,97 +149,10 @@ def _strftime(values, date_format): return access_method(values, date_format) -class DatetimeAccessor: - """Access datetime fields for DataArrays with datetime-like dtypes. - - Similar to pandas, fields can be accessed through the `.dt` attribute - for applicable DataArrays: - - >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', - ... freq='D', periods=100)}) - >>> ds.time.dt - - >>> ds.time.dt.dayofyear[:5] - - array([1, 2, 3, 4, 5], dtype=int32) - Coordinates: - * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - - All of the pandas fields are accessible here. Note that these fields are - not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using cftime, then some fields like - `dayofyear` may not be accurate. - - """ - +class Properties: def __init__(self, obj): - if not _contains_datetime_like_objects(obj): - raise TypeError( - "'dt' accessor only available for " - "DataArray with datetime64 timedelta64 dtype or " - "for arrays containing cftime datetime " - "objects." - ) self._obj = obj - def _tslib_field_accessor( # type: ignore - name: str, docstring: str = None, dtype: np.dtype = None - ): - def f(self, dtype=dtype): - if dtype is None: - dtype = self._obj.dtype - obj_type = type(self._obj) - result = _get_date_field(self._obj.data, name, dtype) - return obj_type( - result, name=name, coords=self._obj.coords, dims=self._obj.dims - ) - - f.__name__ = name - f.__doc__ = docstring - return property(f) - - year = _tslib_field_accessor("year", "The year of the datetime", np.int64) - month = _tslib_field_accessor( - "month", "The month as January=1, December=12", np.int64 - ) - day = _tslib_field_accessor("day", "The days of the datetime", np.int64) - hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64) - minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64) - second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64) - microsecond = _tslib_field_accessor( - "microsecond", "The microseconds of the datetime", np.int64 - ) - nanosecond = _tslib_field_accessor( - "nanosecond", "The nanoseconds of the datetime", np.int64 - ) - weekofyear = _tslib_field_accessor( - "weekofyear", "The week ordinal of the year", np.int64 - ) - week = weekofyear - dayofweek = _tslib_field_accessor( - "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 - ) - weekday = dayofweek - - weekday_name = _tslib_field_accessor( - "weekday_name", "The name of day in a week (ex: Friday)", object - ) - - dayofyear = _tslib_field_accessor( - "dayofyear", "The ordinal day of the year", np.int64 - ) - quarter = _tslib_field_accessor("quarter", "The quarter of the date") - days_in_month = _tslib_field_accessor( - "days_in_month", "The number of days in the month", np.int64 - ) - daysinmonth = days_in_month - - season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object) - - time = _tslib_field_accessor( - "time", "Timestamps corresponding to datetimes", object - ) - def _tslib_round_accessor(self, name, freq): obj_type = type(self._obj) result = _round_field(self._obj.data, name, freq) @@ -323,3 +240,140 @@ def strftime(self, date_format): return obj_type( result, name="strftime", coords=self._obj.coords, dims=self._obj.dims ) + + +class DatetimeProperties(Properties): + """Access datetime fields for DataArrays with datetime-like dtypes. + + Similar to pandas, fields can be accessed through the `.dt` attribute + for applicable DataArrays: + + >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', + ... freq='D', periods=100)}) + >>> ds.time.dt + + >>> ds.time.dt.dayofyear[:5] + + array([1, 2, 3, 4, 5], dtype=int32) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... + + All of the pandas fields are accessible here. Note that these fields are + not calendar-aware; if your datetimes are encoded with a non-Gregorian + calendar (e.g. a 360-day calendar) using cftime, then some fields like + `dayofyear` may not be accurate. + + """ + + def _tslib_field_accessor( # type: ignore + name: str, docstring: str = None, dtype: np.dtype = None + ): + def f(self, dtype=dtype): + if dtype is None: + dtype = self._obj.dtype + obj_type = type(self._obj) + result = _get_date_field(self._obj.data, name, dtype) + return obj_type( + result, name=name, coords=self._obj.coords, dims=self._obj.dims + ) + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + year = _tslib_field_accessor("year", "The year of the datetime", np.int64) + month = _tslib_field_accessor( + "month", "The month as January=1, December=12", np.int64 + ) + day = _tslib_field_accessor("day", "The days of the datetime", np.int64) + hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64) + minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64) + second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64) + microsecond = _tslib_field_accessor( + "microsecond", "The microseconds of the datetime", np.int64 + ) + nanosecond = _tslib_field_accessor( + "nanosecond", "The nanoseconds of the datetime", np.int64 + ) + weekofyear = _tslib_field_accessor( + "weekofyear", "The week ordinal of the year", np.int64 + ) + week = weekofyear + dayofweek = _tslib_field_accessor( + "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 + ) + weekday = dayofweek + + weekday_name = _tslib_field_accessor( + "weekday_name", "The name of day in a week (ex: Friday)", object + ) + + dayofyear = _tslib_field_accessor( + "dayofyear", "The ordinal day of the year", np.int64 + ) + quarter = _tslib_field_accessor("quarter", "The quarter of the date") + days_in_month = _tslib_field_accessor( + "days_in_month", "The number of days in the month", np.int64 + ) + daysinmonth = days_in_month + + season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object) + + time = _tslib_field_accessor( + "time", "Timestamps corresponding to datetimes", object + ) + + +class TimedeltaProperties(Properties): + def _tslib_field_accessor( # type: ignore + name: str, docstring: str = None, dtype: np.dtype = None + ): + def f(self, dtype=dtype): + if dtype is None: + dtype = self._obj.dtype + obj_type = type(self._obj) + result = _get_date_field(self._obj.data, name, dtype) + return obj_type( + result, name=name, coords=self._obj.coords, dims=self._obj.dims + ) + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + seconds = _tslib_field_accessor( + "seconds", + "Number of seconds (>= 0 and less than 1 day) for each element.", + np.int64, + ) + days = _tslib_field_accessor("days", "Number of days for each element.", np.int64) + microseconds = _tslib_field_accessor( + "microseconds", + "Number of microseconds (>= 0 and less than 1 second) for each element.", + np.int64, + ) + nanoseconds = _tslib_field_accessor( + "nanoseconds", + "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", + np.int64, + ) + + +class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): + def __new__(cls, obj): + # CombinedDatetimelikeProperites isn't really instatiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + if not _contains_datetime_like_objects(obj): + raise TypeError( + "'.dt' accessor only available for " + "DataArray with datetime64 timedelta64 dtype or " + "for arrays containing cftime datetime " + "objects." + ) + + if _is_timedelta64_dtype(obj.dtype): + return TimedeltaProperties(obj) + else: + return DatetimeProperties(obj) diff --git a/xarray/core/common.py b/xarray/core/common.py index a74318b2f90..de9f7382463 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1477,3 +1477,9 @@ def _contains_datetime_like_objects(var) -> bool: np.datetime64, np.timedelta64, or cftime.datetime) """ return is_np_datetime_like(var.dtype) or contains_cftime_datetimes(var) + + +def _is_timedelta64_dtype(dtype: DTypeLike) -> bool: + """Check whether dtype is of the timedelta64 dtype. + """ + return np.issubdtype(dtype, np.timedelta64) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b649df6dd56..81ae9fbce57 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -33,7 +33,7 @@ rolling, utils, ) -from .accessor_dt import DatetimeAccessor +from .accessor_dt import CombinedDatetimelikeProperties from .accessor_str import StringAccessor from .alignment import ( _broadcast_helper, @@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords): _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample - dt = property(DatetimeAccessor) + dt = property(CombinedDatetimelikeProperties) def __init__( self, diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 5fe5b8c3f59..a13c9c71d41 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -13,7 +13,7 @@ ) -class TestDatetimeAccessor: +class TestDatetimelikeProperties: @pytest.fixture(autouse=True) def setup(self): nt = 100 @@ -148,6 +148,118 @@ def test_rounders(self): assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) +class TestTimedeltaProperties: + @pytest.fixture(autouse=True) + def setup(self): + nt = 100 + data = np.random.rand(10, 10, nt) + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + self.times = pd.date_range(start="2000/01/01", freq="H", periods=nt) + self.times = self.times - self.times[0] + + self.data = xr.DataArray( + data, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + self.times_arr = np.random.choice(self.times, size=(10, 10, nt)) + self.times_data = xr.DataArray( + self.times_arr, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + def test_field_access(self): + days = xr.DataArray( + self.times.days, name="days", coords=[self.times], dims=["time"] + ) + seconds = xr.DataArray( + self.times.seconds, name="seconds", coords=[self.times], dims=["time"] + ) + microseconds = xr.DataArray( + self.times.microseconds, + name="microseconds", + coords=[self.times], + dims=["time"], + ) + nanoseconds = xr.DataArray( + self.times.nanoseconds, + name="nanoseconds", + coords=[self.times], + dims=["time"], + ) + + assert_equal(days, self.data.time.dt.days) + assert_equal(seconds, self.data.time.dt.seconds) + assert_equal(microseconds, self.data.time.dt.microseconds) + assert_equal(nanoseconds, self.data.time.dt.nanoseconds) + + def test_not_datetime_type(self): + nontime_data = self.data.copy() + int_data = np.arange(len(self.data.time)).astype("int8") + nontime_data["time"].values = int_data + with raises_regex(TypeError, "dt"): + nontime_data.time.dt + + @requires_dask + def test_dask_field_access(self): + import dask.array as da + + days = self.times_data.dt.days + seconds = self.times_data.dt.seconds + microseconds = self.times_data.dt.microseconds + nanoseconds = self.times_data.dt.nanoseconds + floor = self.times_data.dt.floor("D") + ceil = self.times_data.dt.ceil("D") + round = self.times_data.dt.round("D") + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + dask_days = dask_times_2d.dt.days + dask_seconds = dask_times_2d.dt.seconds + dask_microseconds = dask_times_2d.dt.microseconds + dask_nanoseconds = dask_times_2d.dt.nanoseconds + dask_floor = dask_times_2d.dt.floor("D") + dask_ceil = dask_times_2d.dt.ceil("D") + dask_round = dask_times_2d.dt.round("D") + + # Test that the data isn't eagerly evaluated + assert isinstance(dask_days.data, da.Array) + assert isinstance(dask_seconds.data, da.Array) + assert isinstance(dask_microseconds.data, da.Array) + assert isinstance(dask_nanoseconds.data, da.Array) + + # Double check that outcome chunksize is unchanged + dask_chunks = dask_times_2d.chunks + assert dask_days.data.chunks == dask_chunks + assert dask_seconds.data.chunks == dask_chunks + assert dask_microseconds.data.chunks == dask_chunks + assert dask_nanoseconds.data.chunks == dask_chunks + + # Check the actual output from the accessors + assert_equal(days, dask_days.compute()) + assert_equal(seconds, dask_seconds.compute()) + assert_equal(microseconds, dask_microseconds.compute()) + assert_equal(nanoseconds, dask_nanoseconds.compute()) + assert_equal(floor, dask_floor.compute()) + assert_equal(ceil, dask_ceil.compute()) + assert_equal(round, dask_round.compute()) + + def test_rounders(self): + dates = pd.date_range("2014-01-01", "2014-05-01", freq="H") + dates = dates - dates[0] + xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates]) + assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values) + assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values) + assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) + + _CFTIME_CALENDARS = [ "365_day", "360_day", From 1f15409305e79ed42528ee657e0c3520d9a73f57 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 09:56:19 -0700 Subject: [PATCH 02/14] Rename accessors --- xarray/core/accessor_dt.py | 10 +++++----- xarray/core/dataarray.py | 4 ++-- xarray/tests/test_accessor_dt.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index c9078da96c4..008fc0c2804 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -242,7 +242,7 @@ def strftime(self, date_format): ) -class DatetimeProperties(Properties): +class DatetimeAccessor(Properties): """Access datetime fields for DataArrays with datetime-like dtypes. Similar to pandas, fields can be accessed through the `.dt` attribute @@ -324,7 +324,7 @@ def f(self, dtype=dtype): ) -class TimedeltaProperties(Properties): +class TimedeltaAccessor(Properties): def _tslib_field_accessor( # type: ignore name: str, docstring: str = None, dtype: np.dtype = None ): @@ -359,7 +359,7 @@ def f(self, dtype=dtype): ) -class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): +class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor): def __new__(cls, obj): # CombinedDatetimelikeProperites isn't really instatiated. Instead # we need to choose which parent (datetime or timedelta) is @@ -374,6 +374,6 @@ def __new__(cls, obj): ) if _is_timedelta64_dtype(obj.dtype): - return TimedeltaProperties(obj) + return TimedeltaAccessor(obj) else: - return DatetimeProperties(obj) + return DatetimeAccessor(obj) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 81ae9fbce57..b51b03880d4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -33,7 +33,7 @@ rolling, utils, ) -from .accessor_dt import CombinedDatetimelikeProperties +from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( _broadcast_helper, @@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords): _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample - dt = property(CombinedDatetimelikeProperties) + dt = property(CombinedDatetimelikeAccessor) def __init__( self, diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index a13c9c71d41..9eb0c65d7d7 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -148,7 +148,7 @@ def test_rounders(self): assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) -class TestTimedeltaProperties: +class TestTimedeltaAccessor: @pytest.fixture(autouse=True) def setup(self): nt = 100 From efbe6b9dcb9ec068dc8c45c4b424028373cb4363 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 09:59:45 -0700 Subject: [PATCH 03/14] Use `is_np_timedelta_like` for consistency --- xarray/core/accessor_dt.py | 4 ++-- xarray/core/common.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 008fc0c2804..a20319bf5a6 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -4,7 +4,7 @@ from .common import ( _contains_datetime_like_objects, is_np_datetime_like, - _is_timedelta64_dtype, + is_np_timedelta_like, ) from .pycompat import dask_array_type @@ -373,7 +373,7 @@ def __new__(cls, obj): "objects." ) - if _is_timedelta64_dtype(obj.dtype): + if is_np_timedelta_like(obj.dtype): return TimedeltaAccessor(obj) else: return DatetimeAccessor(obj) diff --git a/xarray/core/common.py b/xarray/core/common.py index de9f7382463..e908c69dd14 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool: return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) +def is_np_timedelta_like(dtype: DTypeLike) -> bool: + """Check whether dtype is of the timedelta64 dtype. + """ + return np.issubdtype(dtype, np.timedelta64) + + def _contains_cftime_datetimes(array) -> bool: """Check if an array contains cftime.datetime objects """ @@ -1477,9 +1483,3 @@ def _contains_datetime_like_objects(var) -> bool: np.datetime64, np.timedelta64, or cftime.datetime) """ return is_np_datetime_like(var.dtype) or contains_cftime_datetimes(var) - - -def _is_timedelta64_dtype(dtype: DTypeLike) -> bool: - """Check whether dtype is of the timedelta64 dtype. - """ - return np.issubdtype(dtype, np.timedelta64) From 60ba7d825d210df7a23dee92124b21c24844790f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 10:08:50 -0700 Subject: [PATCH 04/14] Use `pd.timedelta_range` --- xarray/tests/test_accessor_dt.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 9eb0c65d7d7..1742e5a4699 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -13,7 +13,7 @@ ) -class TestDatetimelikeProperties: +class TestDatetimeAccessor: @pytest.fixture(autouse=True) def setup(self): nt = 100 @@ -155,8 +155,7 @@ def setup(self): data = np.random.rand(10, 10, nt) lons = np.linspace(0, 11, 10) lats = np.linspace(0, 20, 10) - self.times = pd.date_range(start="2000/01/01", freq="H", periods=nt) - self.times = self.times - self.times[0] + self.times = pd.timedelta_range(start="1 day", freq="6H", periods=nt) self.data = xr.DataArray( data, @@ -252,8 +251,7 @@ def test_dask_field_access(self): assert_equal(round, dask_round.compute()) def test_rounders(self): - dates = pd.date_range("2014-01-01", "2014-05-01", freq="H") - dates = dates - dates[0] + dates = pd.timedelta_range(start="1 day", end="30 days", freq="6H") xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates]) assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values) assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values) From aefb4bc160af2ae3ae16cdec052c610a03e0f42f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 10:50:55 -0700 Subject: [PATCH 05/14] Move shared method to Properties --- xarray/core/accessor_dt.py | 100 ++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index a20319bf5a6..d98edf33f50 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -153,6 +153,22 @@ class Properties: def __init__(self, obj): self._obj = obj + def _tslib_field_accessor( # type: ignore + name: str, docstring: str = None, dtype: np.dtype = None + ): + def f(self, dtype=dtype): + if dtype is None: + dtype = self._obj.dtype + obj_type = type(self._obj) + result = _get_date_field(self._obj.data, name, dtype) + return obj_type( + result, name=name, coords=self._obj.coords, dims=self._obj.dims + ) + + f.__name__ = name + f.__doc__ = docstring + return property(f) + def _tslib_round_accessor(self, name, freq): obj_type = type(self._obj) result = _round_field(self._obj.data, name, freq) @@ -265,94 +281,74 @@ class DatetimeAccessor(Properties): """ - def _tslib_field_accessor( # type: ignore - name: str, docstring: str = None, dtype: np.dtype = None - ): - def f(self, dtype=dtype): - if dtype is None: - dtype = self._obj.dtype - obj_type = type(self._obj) - result = _get_date_field(self._obj.data, name, dtype) - return obj_type( - result, name=name, coords=self._obj.coords, dims=self._obj.dims - ) - - f.__name__ = name - f.__doc__ = docstring - return property(f) - - year = _tslib_field_accessor("year", "The year of the datetime", np.int64) - month = _tslib_field_accessor( + year = Properties._tslib_field_accessor( + "year", "The year of the datetime", np.int64 + ) + month = Properties._tslib_field_accessor( "month", "The month as January=1, December=12", np.int64 ) - day = _tslib_field_accessor("day", "The days of the datetime", np.int64) - hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64) - minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64) - second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64) - microsecond = _tslib_field_accessor( + day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64) + hour = Properties._tslib_field_accessor( + "hour", "The hours of the datetime", np.int64 + ) + minute = Properties._tslib_field_accessor( + "minute", "The minutes of the datetime", np.int64 + ) + second = Properties._tslib_field_accessor( + "second", "The seconds of the datetime", np.int64 + ) + microsecond = Properties._tslib_field_accessor( "microsecond", "The microseconds of the datetime", np.int64 ) - nanosecond = _tslib_field_accessor( + nanosecond = Properties._tslib_field_accessor( "nanosecond", "The nanoseconds of the datetime", np.int64 ) - weekofyear = _tslib_field_accessor( + weekofyear = Properties._tslib_field_accessor( "weekofyear", "The week ordinal of the year", np.int64 ) week = weekofyear - dayofweek = _tslib_field_accessor( + dayofweek = Properties._tslib_field_accessor( "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 ) weekday = dayofweek - weekday_name = _tslib_field_accessor( + weekday_name = Properties._tslib_field_accessor( "weekday_name", "The name of day in a week (ex: Friday)", object ) - dayofyear = _tslib_field_accessor( + dayofyear = Properties._tslib_field_accessor( "dayofyear", "The ordinal day of the year", np.int64 ) - quarter = _tslib_field_accessor("quarter", "The quarter of the date") - days_in_month = _tslib_field_accessor( + quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date") + days_in_month = Properties._tslib_field_accessor( "days_in_month", "The number of days in the month", np.int64 ) daysinmonth = days_in_month - season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object) + season = Properties._tslib_field_accessor( + "season", "Season of the year (ex: DJF)", object + ) - time = _tslib_field_accessor( + time = Properties._tslib_field_accessor( "time", "Timestamps corresponding to datetimes", object ) class TimedeltaAccessor(Properties): - def _tslib_field_accessor( # type: ignore - name: str, docstring: str = None, dtype: np.dtype = None - ): - def f(self, dtype=dtype): - if dtype is None: - dtype = self._obj.dtype - obj_type = type(self._obj) - result = _get_date_field(self._obj.data, name, dtype) - return obj_type( - result, name=name, coords=self._obj.coords, dims=self._obj.dims - ) - - f.__name__ = name - f.__doc__ = docstring - return property(f) - - seconds = _tslib_field_accessor( + seconds = Properties._tslib_field_accessor( "seconds", "Number of seconds (>= 0 and less than 1 day) for each element.", np.int64, ) - days = _tslib_field_accessor("days", "Number of days for each element.", np.int64) - microseconds = _tslib_field_accessor( + days = Properties._tslib_field_accessor( + "days", "Number of days for each element.", np.int64 + ) + microseconds = Properties._tslib_field_accessor( "microseconds", "Number of microseconds (>= 0 and less than 1 second) for each element.", np.int64, ) - nanoseconds = _tslib_field_accessor( + nanoseconds = Properties._tslib_field_accessor( "nanoseconds", "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", np.int64, From 84efe006ab133241dad786870f3b48aefbb9a0d1 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 11:43:54 -0700 Subject: [PATCH 06/14] Parametrize field access test --- xarray/tests/test_accessor_dt.py | 42 ++++++++++++-------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 1742e5a4699..449ba51d319 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -12,6 +12,8 @@ requires_dask, ) +from .test_dask import raise_if_dask_computes, assert_chunks_equal + class TestDatetimeAccessor: @pytest.fixture(autouse=True) @@ -205,13 +207,14 @@ def test_not_datetime_type(self): nontime_data.time.dt @requires_dask - def test_dask_field_access(self): + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_dask_field_access(self, field): import dask.array as da - days = self.times_data.dt.days - seconds = self.times_data.dt.seconds - microseconds = self.times_data.dt.microseconds - nanoseconds = self.times_data.dt.nanoseconds + expected = getattr(self.times_data.dt, field) + floor = self.times_data.dt.floor("D") ceil = self.times_data.dt.ceil("D") round = self.times_data.dt.round("D") @@ -220,32 +223,17 @@ def test_dask_field_access(self): dask_times_2d = xr.DataArray( dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" ) - dask_days = dask_times_2d.dt.days - dask_seconds = dask_times_2d.dt.seconds - dask_microseconds = dask_times_2d.dt.microseconds - dask_nanoseconds = dask_times_2d.dt.nanoseconds + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual, expected) dask_floor = dask_times_2d.dt.floor("D") dask_ceil = dask_times_2d.dt.ceil("D") dask_round = dask_times_2d.dt.round("D") - # Test that the data isn't eagerly evaluated - assert isinstance(dask_days.data, da.Array) - assert isinstance(dask_seconds.data, da.Array) - assert isinstance(dask_microseconds.data, da.Array) - assert isinstance(dask_nanoseconds.data, da.Array) - - # Double check that outcome chunksize is unchanged - dask_chunks = dask_times_2d.chunks - assert dask_days.data.chunks == dask_chunks - assert dask_seconds.data.chunks == dask_chunks - assert dask_microseconds.data.chunks == dask_chunks - assert dask_nanoseconds.data.chunks == dask_chunks - - # Check the actual output from the accessors - assert_equal(days, dask_days.compute()) - assert_equal(seconds, dask_seconds.compute()) - assert_equal(microseconds, dask_microseconds.compute()) - assert_equal(nanoseconds, dask_nanoseconds.compute()) assert_equal(floor, dask_floor.compute()) assert_equal(ceil, dask_ceil.compute()) assert_equal(round, dask_round.compute()) From cc9a2205a435c62f2efabc82c6a7de575ed1f6a0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 11:47:47 -0700 Subject: [PATCH 07/14] move `strftime()` to `DatetimeAccessor` --- xarray/core/accessor_dt.py | 48 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index d98edf33f50..b1251b2d283 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -223,6 +223,30 @@ def round(self, freq): """ return self._tslib_round_accessor("round", freq) + +class DatetimeAccessor(Properties): + """Access datetime fields for DataArrays with datetime-like dtypes. + + Similar to pandas, fields can be accessed through the `.dt` attribute + for applicable DataArrays: + + >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', + ... freq='D', periods=100)}) + >>> ds.time.dt + + >>> ds.time.dt.dayofyear[:5] + + array([1, 2, 3, 4, 5], dtype=int32) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... + + All of the pandas fields are accessible here. Note that these fields are + not calendar-aware; if your datetimes are encoded with a non-Gregorian + calendar (e.g. a 360-day calendar) using cftime, then some fields like + `dayofyear` may not be accurate. + + """ + def strftime(self, date_format): ''' Return an array of formatted strings specified by date_format, which @@ -257,30 +281,6 @@ def strftime(self, date_format): result, name="strftime", coords=self._obj.coords, dims=self._obj.dims ) - -class DatetimeAccessor(Properties): - """Access datetime fields for DataArrays with datetime-like dtypes. - - Similar to pandas, fields can be accessed through the `.dt` attribute - for applicable DataArrays: - - >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', - ... freq='D', periods=100)}) - >>> ds.time.dt - - >>> ds.time.dt.dayofyear[:5] - - array([1, 2, 3, 4, 5], dtype=int32) - Coordinates: - * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - - All of the pandas fields are accessible here. Note that these fields are - not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using cftime, then some fields like - `dayofyear` may not be accurate. - - """ - year = Properties._tslib_field_accessor( "year", "The year of the datetime", np.int64 ) From c94f1b3b6aa16126209146e6270db6c446ca414a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 12:14:13 -0700 Subject: [PATCH 08/14] Update the documentation --- doc/api.rst | 1 + xarray/core/accessor_dt.py | 97 +++++++++++++++++++++++++++++++------- 2 files changed, 80 insertions(+), 18 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 8b523b7837c..2fa9ab8c2d2 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -593,6 +593,7 @@ Accessors :toctree: generated/ core.accessor_dt.DatetimeAccessor + core.accessor_dt.TimedeltaAccessor core.accessor_str.StringAccessor Custom Indexes diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index b1251b2d283..f4c72421c99 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -227,23 +227,43 @@ def round(self, freq): class DatetimeAccessor(Properties): """Access datetime fields for DataArrays with datetime-like dtypes. - Similar to pandas, fields can be accessed through the `.dt` attribute - for applicable DataArrays: - - >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', - ... freq='D', periods=100)}) - >>> ds.time.dt - - >>> ds.time.dt.dayofyear[:5] - - array([1, 2, 3, 4, 5], dtype=int32) - Coordinates: - * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - - All of the pandas fields are accessible here. Note that these fields are - not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using cftime, then some fields like - `dayofyear` may not be accurate. + Fields can be accessed through the `.dt` attribute + for applicable DataArrays. + + Notes + ------ + Note that these fields are not calendar-aware; if your datetimes are encoded + with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime, + then some fields like `dayofyear` may not be accurate. + + Examples + --------- + >>> import xarray as xr + >>> import pandas as pd + >>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000', + '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000', + '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000', + '2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000', + '2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'], + dtype='datetime64[ns]') + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt + + >>> ts.dt.dayofyear + + array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt.quarter + + array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 """ @@ -335,6 +355,47 @@ def strftime(self, date_format): class TimedeltaAccessor(Properties): + """Access Timedelta fields for DataArrays with Timedelta-like dtypes. + + Fields can be accessed through the `.dt` attribute for applicable DataArrays. + + Examples + -------- + >>> import pandas as pd + >>> import xarray as xr + >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000, + 172800000000000, 194400000000000, 216000000000000, 237600000000000, + 259200000000000, 280800000000000, 302400000000000, 324000000000000, + 345600000000000, 367200000000000, 388800000000000, 410400000000000, + 432000000000000, 453600000000000, 475200000000000, 496800000000000], + dtype='timedelta64[ns]') + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt + + >>> ts.dt.days + + array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.microseconds + + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.seconds + + array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, + 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600, + 43200, 64800]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + """ + seconds = Properties._tslib_field_accessor( "seconds", "Number of seconds (>= 0 and less than 1 day) for each element.", @@ -357,7 +418,7 @@ class TimedeltaAccessor(Properties): class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor): def __new__(cls, obj): - # CombinedDatetimelikeProperites isn't really instatiated. Instead + # CombinedDatetimelikeAccessor isn't really instatiated. Instead # we need to choose which parent (datetime or timedelta) is # appropriate. Since we're checking the dtypes anyway, we'll just # do all the validation here. From 2b3c43827cdd93aad057a7c474d8a315f04a6efc Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 12:20:01 -0700 Subject: [PATCH 09/14] Update `whats-new.rst` --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1f60d457432..6f2293818a6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,6 +31,8 @@ New Features - Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ +- Support `.dt` accessor for timedelta via :py:class:`core.accessor_dt.TimedeltaAccessor` + By `Anderson Banihirwe `_. Bug fixes ~~~~~~~~~ From d4fcb04698fb52c0644efe77777ef28a1ba3c3e3 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 12:21:29 -0700 Subject: [PATCH 10/14] Add PR reference --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6f2293818a6..99f251e173f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,7 +31,8 @@ New Features - Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ -- Support `.dt` accessor for timedelta via :py:class:`core.accessor_dt.TimedeltaAccessor` +- Support `.dt` accessor for timedelta via + :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) By `Anderson Banihirwe `_. Bug fixes From 767dd0d4c0f35b9a2a618726c7fc2281730849fc Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 15:31:18 -0700 Subject: [PATCH 11/14] Parametrize tests --- xarray/tests/test_accessor_dt.py | 193 ++++++++++++++----------------- 1 file changed, 88 insertions(+), 105 deletions(-) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 449ba51d319..c6cf2f1d0da 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -39,24 +39,13 @@ def setup(self): name="data", ) - def test_field_access(self): - years = xr.DataArray( - self.times.year, name="year", coords=[self.times], dims=["time"] + @pytest.mark.parametrize("field", ["year", "month", "day", "hour"]) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] ) - months = xr.DataArray( - self.times.month, name="month", coords=[self.times], dims=["time"] - ) - days = xr.DataArray( - self.times.day, name="day", coords=[self.times], dims=["time"] - ) - hours = xr.DataArray( - self.times.hour, name="hour", coords=[self.times], dims=["time"] - ) - - assert_equal(years, self.data.time.dt.year) - assert_equal(months, self.data.time.dt.month) - assert_equal(days, self.data.time.dt.day) - assert_equal(hours, self.data.time.dt.hour) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) def test_strftime(self): assert ( @@ -71,55 +60,49 @@ def test_not_datetime_type(self): nontime_data.time.dt @requires_dask - def test_dask_field_access(self): + @pytest.mark.parametrize("field", ["year", "month", "day", "hour"]) + def test_dask_field_access(self, field): import dask.array as da - years = self.times_data.dt.year - months = self.times_data.dt.month - hours = self.times_data.dt.hour - days = self.times_data.dt.day - floor = self.times_data.dt.floor("D") - ceil = self.times_data.dt.ceil("D") - round = self.times_data.dt.round("D") - strftime = self.times_data.dt.strftime("%Y-%m-%d %H:%M:%S") + expected = getattr(self.times_data.dt, field) + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual, expected) + + @requires_dask + @pytest.mark.parametrize( + "method, parameters", + [ + ("floor", "D"), + ("ceil", "D"), + ("round", "D"), + ("strftime", "%Y-%m-%d %H:%M:%S"), + ], + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da + expected = getattr(self.times_data.dt, method)(parameters) dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) dask_times_2d = xr.DataArray( dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" ) - dask_year = dask_times_2d.dt.year - dask_month = dask_times_2d.dt.month - dask_day = dask_times_2d.dt.day - dask_hour = dask_times_2d.dt.hour - dask_floor = dask_times_2d.dt.floor("D") - dask_ceil = dask_times_2d.dt.ceil("D") - dask_round = dask_times_2d.dt.round("D") - dask_strftime = dask_times_2d.dt.strftime("%Y-%m-%d %H:%M:%S") - - # Test that the data isn't eagerly evaluated - assert isinstance(dask_year.data, da.Array) - assert isinstance(dask_month.data, da.Array) - assert isinstance(dask_day.data, da.Array) - assert isinstance(dask_hour.data, da.Array) - assert isinstance(dask_strftime.data, da.Array) - - # Double check that outcome chunksize is unchanged - dask_chunks = dask_times_2d.chunks - assert dask_year.data.chunks == dask_chunks - assert dask_month.data.chunks == dask_chunks - assert dask_day.data.chunks == dask_chunks - assert dask_hour.data.chunks == dask_chunks - assert dask_strftime.data.chunks == dask_chunks - - # Check the actual output from the accessors - assert_equal(years, dask_year.compute()) - assert_equal(months, dask_month.compute()) - assert_equal(days, dask_day.compute()) - assert_equal(hours, dask_hour.compute()) - assert_equal(floor, dask_floor.compute()) - assert_equal(ceil, dask_ceil.compute()) - assert_equal(round, dask_round.compute()) - assert_equal(strftime, dask_strftime.compute()) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) def test_seasons(self): dates = pd.date_range(start="2000/01/01", freq="M", periods=12) @@ -142,12 +125,15 @@ def test_seasons(self): assert_array_equal(seasons.values, dates.dt.season.values) - def test_rounders(self): + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_method(self, method, parameters): dates = pd.date_range("2014-01-01", "2014-05-01", freq="H") - xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates]) - assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values) - assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values) - assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) class TestTimedeltaAccessor: @@ -174,31 +160,6 @@ def setup(self): name="data", ) - def test_field_access(self): - days = xr.DataArray( - self.times.days, name="days", coords=[self.times], dims=["time"] - ) - seconds = xr.DataArray( - self.times.seconds, name="seconds", coords=[self.times], dims=["time"] - ) - microseconds = xr.DataArray( - self.times.microseconds, - name="microseconds", - coords=[self.times], - dims=["time"], - ) - nanoseconds = xr.DataArray( - self.times.nanoseconds, - name="nanoseconds", - coords=[self.times], - dims=["time"], - ) - - assert_equal(days, self.data.time.dt.days) - assert_equal(seconds, self.data.time.dt.seconds) - assert_equal(microseconds, self.data.time.dt.microseconds) - assert_equal(nanoseconds, self.data.time.dt.nanoseconds) - def test_not_datetime_type(self): nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") @@ -206,6 +167,26 @@ def test_not_datetime_type(self): with raises_regex(TypeError, "dt"): nontime_data.time.dt + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] + ) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) + + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_methods(self, method, parameters): + dates = pd.timedelta_range(start="1 day", end="30 days", freq="6H") + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) + @requires_dask @pytest.mark.parametrize( "field", ["days", "seconds", "microseconds", "nanoseconds"] @@ -215,10 +196,6 @@ def test_dask_field_access(self, field): expected = getattr(self.times_data.dt, field) - floor = self.times_data.dt.floor("D") - ceil = self.times_data.dt.ceil("D") - round = self.times_data.dt.round("D") - dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) dask_times_2d = xr.DataArray( dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" @@ -230,20 +207,26 @@ def test_dask_field_access(self, field): assert isinstance(actual.data, da.Array) assert_chunks_equal(actual, dask_times_2d) assert_equal(actual, expected) - dask_floor = dask_times_2d.dt.floor("D") - dask_ceil = dask_times_2d.dt.ceil("D") - dask_round = dask_times_2d.dt.round("D") - assert_equal(floor, dask_floor.compute()) - assert_equal(ceil, dask_ceil.compute()) - assert_equal(round, dask_round.compute()) + @requires_dask + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da - def test_rounders(self): - dates = pd.timedelta_range(start="1 day", end="30 days", freq="6H") - xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates]) - assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values) - assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values) - assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) + expected = getattr(self.times_data.dt, method)(parameters) + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) _CFTIME_CALENDARS = [ From b8baad32ab5892517a797a872b23c1e30579adb6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 16:05:21 -0700 Subject: [PATCH 12/14] Extend DatetimeAccessor properties --- doc/whats-new.rst | 5 +-- xarray/core/accessor_dt.py | 28 ++++++++++++++++ xarray/tests/test_accessor_dt.py | 56 ++++++++++++++++++++++++++++++-- 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 99f251e173f..72b2f8eea6c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,8 +31,9 @@ New Features - Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ -- Support `.dt` accessor for timedelta via - :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) +- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties + and support `.dt` accessor for timedelta + via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) By `Anderson Banihirwe `_. Bug fixes diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index f4c72421c99..cbd44e47a9f 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -353,6 +353,34 @@ def strftime(self, date_format): "time", "Timestamps corresponding to datetimes", object ) + is_month_start = Properties._tslib_field_accessor( + "is_month_start", + "Indicates whether the date is the first day of the month.", + bool, + ) + is_month_end = Properties._tslib_field_accessor( + "is_month_end", "Indicates whether the date is the last day of the month.", bool + ) + is_quarter_start = Properties._tslib_field_accessor( + "is_quarter_start", + "Indicator for whether the date is the first day of a quarter.", + bool, + ) + is_quarter_end = Properties._tslib_field_accessor( + "is_quarter_end", + "Indicator for whether the date is the last day of a quarter.", + bool, + ) + is_year_start = Properties._tslib_field_accessor( + "is_year_start", "Indicate whether the date is the first day of a year.", bool + ) + is_year_end = Properties._tslib_field_accessor( + "is_year_end", "Indicate whether the date is the last day of the year.", bool + ) + is_leap_year = Properties._tslib_field_accessor( + "is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool + ) + class TimedeltaAccessor(Properties): """Access Timedelta fields for DataArrays with Timedelta-like dtypes. diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index c6cf2f1d0da..67ca12532c7 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -39,7 +39,32 @@ def setup(self): name="data", ) - @pytest.mark.parametrize("field", ["year", "month", "day", "hour"]) + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) def test_field_access(self, field): expected = xr.DataArray( getattr(self.times, field), name=field, coords=[self.times], dims=["time"] @@ -60,7 +85,32 @@ def test_not_datetime_type(self): nontime_data.time.dt @requires_dask - @pytest.mark.parametrize("field", ["year", "month", "day", "hour"]) + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) def test_dask_field_access(self, field): import dask.array as da @@ -76,7 +126,7 @@ def test_dask_field_access(self, field): assert isinstance(actual.data, da.Array) assert_chunks_equal(actual, dask_times_2d) - assert_equal(actual, expected) + assert_equal(actual.compute(), expected.compute()) @requires_dask @pytest.mark.parametrize( From 26fc6b9b066a114e357a76e3673e3873071223bf Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 16:12:39 -0700 Subject: [PATCH 13/14] Cleanup --- xarray/core/accessor_dt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index cbd44e47a9f..bc995f7d4a1 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -424,14 +424,14 @@ class TimedeltaAccessor(Properties): * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 """ + days = Properties._tslib_field_accessor( + "days", "Number of days for each element.", np.int64 + ) seconds = Properties._tslib_field_accessor( "seconds", "Number of seconds (>= 0 and less than 1 day) for each element.", np.int64, ) - days = Properties._tslib_field_accessor( - "days", "Number of days for each element.", np.int64 - ) microseconds = Properties._tslib_field_accessor( "microseconds", "Number of microseconds (>= 0 and less than 1 second) for each element.", From ce565ee4ba4f79b34a38c16d4e427a0f3f98164d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Dec 2019 16:35:00 -0700 Subject: [PATCH 14/14] Fix docstring --- xarray/core/accessor_dt.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index bc995f7d4a1..c407371f9f0 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -333,7 +333,7 @@ def strftime(self, date_format): weekday = dayofweek weekday_name = Properties._tslib_field_accessor( - "weekday_name", "The name of day in a week (ex: Friday)", object + "weekday_name", "The name of day in a week", object ) dayofyear = Properties._tslib_field_accessor( @@ -345,9 +345,7 @@ def strftime(self, date_format): ) daysinmonth = days_in_month - season = Properties._tslib_field_accessor( - "season", "Season of the year (ex: DJF)", object - ) + season = Properties._tslib_field_accessor("season", "Season of the year", object) time = Properties._tslib_field_accessor( "time", "Timestamps corresponding to datetimes", object