Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: make Day preserve time-of-day across DST transitions #55502

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.tseries.offsets.Day.freqstr SA01" \
-i "pandas.tseries.offsets.Day.is_on_offset GL08" \
-i "pandas.tseries.offsets.Day.n GL08" \
-i "pandas.tseries.offsets.Day.nanos SA01" \
-i "pandas.tseries.offsets.Day.nanos GL08" \
-i "pandas.tseries.offsets.Day.normalize GL08" \
-i "pandas.tseries.offsets.Day.rule_code GL08" \
-i "pandas.tseries.offsets.Easter PR02" \
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/timedeltas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Further, operations among the scalars yield another scalar ``Timedelta``.

.. ipython:: python

pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta(
pd.Timedelta(pd.offsets.Hour(48)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta(
"00:00:00.000123"
)

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"Day",
"guess_datetime_format",
"add_overflowsafe",
"get_supported_dtype",
Expand Down Expand Up @@ -61,6 +62,7 @@
)
from pandas._libs.tslibs.offsets import (
BaseOffset,
Day,
Tick,
to_offset,
)
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/tslibs/offsets.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class BaseOffset:
def __getstate__(self): ...
@property
def nanos(self) -> int: ...
def _maybe_to_hours(self) -> BaseOffset: ...

def _get_offset(name: str) -> BaseOffset: ...

Expand All @@ -116,7 +117,9 @@ class Tick(SingleConstructorOffset):

def delta_to_tick(delta: timedelta) -> Tick: ...

class Day(Tick): ...
class Day(BaseOffset):
def _maybe_to_hours(self) -> Hour: ...

class Hour(Tick): ...
class Minute(Tick): ...
class Second(Tick): ...
Expand Down
74 changes: 60 additions & 14 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,11 @@ cdef class BaseOffset:
def nanos(self):
raise ValueError(f"{self} is a non-fixed frequency")

def _maybe_to_hours(self):
if not isinstance(self, Day):
return self
return Hour(self.n * 24)

# ------------------------------------------------------------------

def is_month_start(self, _Timestamp ts):
Expand Down Expand Up @@ -954,8 +959,6 @@ cdef class Tick(SingleConstructorOffset):
# Note: Without making this cpdef, we get AttributeError when calling
# from __mul__
cpdef Tick _next_higher_resolution(Tick self):
if type(self) is Day:
return Hour(self.n * 24)
if type(self) is Hour:
return Minute(self.n * 60)
if type(self) is Minute:
Expand Down Expand Up @@ -1102,7 +1105,7 @@ cdef class Tick(SingleConstructorOffset):
self.normalize = False


cdef class Day(Tick):
cdef class Day(SingleConstructorOffset):
"""
Offset ``n`` days.

Expand Down Expand Up @@ -1132,11 +1135,51 @@ cdef class Day(Tick):
>>> ts + Day(-4)
Timestamp('2022-12-05 15:00:00')
"""
_adjust_dst = True
_attributes = tuple(["n", "normalize"])
_nanos_inc = 24 * 3600 * 1_000_000_000
_prefix = "D"
_period_dtype_code = PeriodDtypeCode.D
_creso = NPY_DATETIMEUNIT.NPY_FR_D

def __init__(self, n=1, normalize=False):
BaseOffset.__init__(self, n)
if normalize:
# GH#21427
raise ValueError(
"Day offset with `normalize=True` are not allowed."
)

def is_on_offset(self, dt) -> bool:
return True

@apply_wraps
def _apply(self, other):
if isinstance(other, Day):
# TODO: why isn't this handled in __add__?
return Day(self.n + other.n)
return other + np.timedelta64(self.n, "D")

def _apply_array(self, dtarr):
return dtarr + np.timedelta64(self.n, "D")

@cache_readonly
def freqstr(self) -> str:
"""
Return a string representing the frequency.

Examples
--------
>>> pd.Day(5).freqstr
'5D'

>>> pd.offsets.Day(1).freqstr
'D'
"""
if self.n != 1:
return str(self.n) + "D"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if self.n == 0 do we still need to show "0D"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That’s what it is ATM. What else would you suggest?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK if it's the current behavior I suppose that's fine to keep it

return "D"


cdef class Hour(Tick):
"""
Expand Down Expand Up @@ -1360,16 +1403,13 @@ cdef class Nano(Tick):
def delta_to_tick(delta: timedelta) -> Tick:
if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0:
# nanoseconds only for pd.Timedelta
if delta.seconds == 0:
return Day(delta.days)
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(seconds / 3600)
elif seconds % 60 == 0:
return Minute(seconds / 60)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(seconds / 3600)
elif seconds % 60 == 0:
return Minute(seconds / 60)
else:
return Second(seconds)
return Second(seconds)
else:
nanos = delta_to_nanoseconds(delta)
if nanos % 1_000_000 == 0:
Expand Down Expand Up @@ -4838,7 +4878,7 @@ cpdef to_offset(freq, bint is_period=False):
<2 * BusinessDays>

>>> to_offset(pd.Timedelta(days=1))
<Day>
<24 * Hours>

>>> to_offset(pd.offsets.Hour())
<Hour>
Expand Down Expand Up @@ -4918,7 +4958,7 @@ cpdef to_offset(freq, bint is_period=False):
)
prefix = c_DEPR_ABBREVS[prefix]

if prefix in {"D", "h", "min", "s", "ms", "us", "ns"}:
if prefix in {"h", "min", "s", "ms", "us", "ns"}:
# For these prefixes, we have something like "3h" or
# "2.5min", so we can construct a Timedelta with the
# matching unit and get our offset from delta_to_tick
Expand All @@ -4936,6 +4976,12 @@ cpdef to_offset(freq, bint is_period=False):

if result is None:
result = offset
elif isinstance(result, Day) and isinstance(offset, Tick):
# e.g. "1D1H" is treated like "25H"
result = Hour(result.n * 24) + offset
elif isinstance(offset, Day) and isinstance(result, Tick):
# e.g. "1H1D" is treated like "25H"
result = result + Hour(offset.n * 24)
else:
result = result + offset
except (ValueError, TypeError) as err:
Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ from pandas._libs.tslibs.offsets cimport (
from pandas._libs.tslibs.offsets import (
INVALID_FREQ_ERR_MSG,
BDay,
Day,
)

cdef:
Expand Down Expand Up @@ -1823,6 +1824,10 @@ cdef class _Period(PeriodMixin):
# i.e. np.timedelta64("nat")
return NaT

if isinstance(other, Day):
# Periods are timezone-naive, so we treat Day as Tick-like
other = np.timedelta64(other.n, "D")

try:
inc = delta_to_nanoseconds(other, reso=self._dtype._creso, round_ok=False)
except ValueError as err:
Expand All @@ -1844,7 +1849,7 @@ cdef class _Period(PeriodMixin):

@cython.overflowcheck(True)
def __add__(self, other):
if is_any_td_scalar(other):
if is_any_td_scalar(other) or isinstance(other, Day):
return self._add_timedeltalike_scalar(other)
elif is_offset_object(other):
return self._add_offset(other)
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,7 @@ cdef bint _should_cast_to_timedelta(object obj):
cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1:
from pandas._libs.tslibs.offsets import to_offset

freq = to_offset(freq)
# In this context it is unambiguous that "D" represents 24 hours
freq = to_offset(freq)._maybe_to_hours()
freq.nanos # raises on non-fixed freq
return delta_to_nanoseconds(freq, creso)
25 changes: 23 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)
from pandas._libs.tslibs import (
BaseOffset,
Day,
IncompatibleFrequency,
NaT,
NaTType,
Expand Down Expand Up @@ -904,14 +905,21 @@ def inferred_freq(self) -> str | None:
TimedeltaIndex(['0 days', '10 days', '20 days'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.inferred_freq
'10D'
'240h'
"""
if self.ndim != 1:
return None
try:
return frequencies.infer_freq(self)
res = frequencies.infer_freq(self)
except ValueError:
return None
if self.dtype.kind == "m" and res is not None and res.endswith("D"):
# TimedeltaArray freq must be a Tick, so we convert the inferred
# daily freq to hourly.
if res == "D":
return "24h"
res = str(int(res[:-1]) * 24) + "h"
return res

@property # NB: override with cache_readonly in immutable subclasses
def _resolution_obj(self) -> Resolution | None:
Expand Down Expand Up @@ -1052,6 +1060,10 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
elif isinstance(self.freq, Tick):
# In these cases
return self.freq
elif isinstance(self.freq, Day) and getattr(self, "tz", None) is None:
return self.freq
# TODO: are there tzaware cases when we can reliably preserve freq?
# We have a bunch of tests that seem to think so
return None

@final
Expand Down Expand Up @@ -1147,6 +1159,10 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
res_m8 = res_values.view(f"timedelta64[{self.unit}]")

new_freq = self._get_arithmetic_result_freq(other)
if new_freq is not None:
# TODO: are we sure this is right?
new_freq = new_freq._maybe_to_hours()

new_freq = cast("Tick | None", new_freq)
return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)

Expand Down Expand Up @@ -1988,6 +2004,8 @@ def _maybe_pin_freq(self, freq, validate_kwds: dict) -> None:
# We cannot inherit a freq from the data, so we need to validate
# the user-passed freq
freq = to_offset(freq)
if self.dtype.kind == "m":
freq = freq._maybe_to_hours()
type(self)._validate_frequency(self, freq, **validate_kwds)
self._freq = freq
else:
Expand Down Expand Up @@ -2237,6 +2255,9 @@ def _with_freq(self, freq) -> Self:
assert freq == "infer"
freq = to_offset(self.inferred_freq)

if self.dtype.kind == "m" and freq is not None:
assert isinstance(freq, Tick)

arr = self.view()
arr._freq = freq
return arr
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,10 @@ def _generate_range(
if end is not None:
end = end.tz_localize(None)

if isinstance(freq, Tick):
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
if isinstance(freq, Tick) or (tz is None and isinstance(freq, Day)):
i8values = generate_regular_range(
start, end, periods, freq._maybe_to_hours(), unit=unit
)
else:
xdr = _generate_range(
start=start, end=end, periods=periods, offset=freq, unit=unit
Expand Down Expand Up @@ -934,7 +936,14 @@ def tz_convert(self, tz) -> Self:

# No conversion since timestamps are all UTC to begin with
dtype = tz_to_dtype(tz, unit=self.unit)
return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
new_freq = self.freq
if self.freq is not None and self.freq._adjust_dst:
# TODO: in some cases we may be able to retain, e.g. if old and new
# tz are both fixed offsets, or if no DST-crossings occur.
# The latter is value-dependent behavior that we may want to avoid.
# Or could convert e.g. "D" to "24h", see GH#51716
new_freq = None
return self._simple_new(self._ndarray, dtype=dtype, freq=new_freq)

@dtl.ravel_compat
def tz_localize(
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import (
BaseOffset,
Day,
NaT,
NaTType,
Timedelta,
Expand Down Expand Up @@ -855,6 +856,9 @@ def _addsub_int_array_or_scalar(
def _add_offset(self, other: BaseOffset):
assert not isinstance(other, Tick)

if isinstance(other, Day):
return self + np.timedelta64(other.n, "D")

self._require_matching_freq(other, base=True)
return self._addsub_int_array_or_scalar(other.n, operator.add)

Expand All @@ -869,15 +873,18 @@ def _add_timedeltalike_scalar(self, other):
-------
PeriodArray
"""
if not isinstance(self.freq, Tick):
if not isinstance(self.freq, (Tick, Day)):
# We cannot add timedelta-like to non-tick PeriodArray
raise raise_on_incompatible(self, other)

if isna(other):
# i.e. np.timedelta64("NaT")
return super()._add_timedeltalike_scalar(other)

td = np.asarray(Timedelta(other).asm8)
if isinstance(other, Day):
td = np.asarray(Timedelta(days=other.n).asm8)
else:
td = np.asarray(Timedelta(other).asm8)
return self._add_timedelta_arraylike(td)

def _add_timedelta_arraylike(
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
tslibs,
)
from pandas._libs.tslibs import (
Day,
NaT,
NaTType,
Tick,
Expand Down Expand Up @@ -256,6 +257,12 @@ def _from_sequence_not_strict(

assert unit not in ["Y", "y", "M"] # caller is responsible for checking

if isinstance(freq, Day):
raise ValueError(
"Day offset object is not valid for TimedeltaIndex, "
"pass e.g. 24H instead."
)

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)

if dtype is not None:
Expand All @@ -274,6 +281,9 @@ def _generate_range(
if freq is None and any(x is None for x in [periods, start, end]):
raise ValueError("Must provide freq argument if no data is supplied")

if isinstance(freq, Day):
raise TypeError("TimedeltaArray/Index freq must be a Tick or None")

if com.count_not_none(start, end, periods, freq) != 3:
raise ValueError(
"Of the four parameters: start, end, periods, "
Expand Down
Loading