Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP/API: Make Day not a Tick #51874

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
3297047
API: Make Day not a Tick
jbrockmendel Mar 9, 2023
afa935d
CLN
jbrockmendel Mar 9, 2023
a81ac61
fix remaining tests
jbrockmendel Mar 10, 2023
bd9a235
lint fixups
jbrockmendel Mar 10, 2023
eee39ad
remove extra to_hours
jbrockmendel Mar 10, 2023
9d346ed
simplify
jbrockmendel Mar 10, 2023
5d905bd
revert accidental
jbrockmendel Mar 10, 2023
1b2f374
Merge branch 'main' into daydst
jbrockmendel Mar 10, 2023
004869d
mypy, docs fixups, cleanups
jbrockmendel Mar 11, 2023
5fc076b
Merge branch 'main' into daydst
jbrockmendel Mar 11, 2023
2c9ffca
remove unnecessary check
jbrockmendel Mar 11, 2023
a145903
Merge branch 'main' into daydst
jbrockmendel Mar 11, 2023
9a8a513
Merge branch 'main' into daydst
jbrockmendel Mar 11, 2023
d4afeec
Merge branch 'main' into daydst
jbrockmendel Mar 11, 2023
0fbf296
Merge branch 'main' into daydst
jbrockmendel Mar 11, 2023
d1851cf
CLN: unnecessary pass
jbrockmendel Mar 11, 2023
696d6f7
Merge branch 'main' into daydst
jbrockmendel Mar 13, 2023
06e3a04
Merge branch 'main' into daydst
jbrockmendel Mar 13, 2023
8e89064
Fix doctest
jbrockmendel Mar 13, 2023
e368b51
Merge branch 'main' into daydst
jbrockmendel Mar 14, 2023
5d4618c
Merge branch 'main' into daydst
jbrockmendel Mar 15, 2023
cc983ae
Merge branch 'main' into daydst
jbrockmendel Mar 16, 2023
69e654b
mypy fixup
jbrockmendel Mar 16, 2023
b390372
Merge branch 'main' into daydst
jbrockmendel Mar 17, 2023
976d84b
troubleshoot docstring validation
jbrockmendel Mar 17, 2023
3089794
Merge branch 'main' into daydst
jbrockmendel Mar 29, 2023
ff2da5e
Merge branch 'main' into daydst
jbrockmendel Apr 3, 2023
a2bf919
Merge branch 'main' into daydst
jbrockmendel Apr 21, 2023
0e14c0a
Merge branch 'main' into daydst
jbrockmendel Apr 28, 2023
0241f22
Merge branch 'main' into daydst
jbrockmendel May 5, 2023
37b78ea
Merge branch 'main' into daydst
jbrockmendel Jul 18, 2023
d4cb05e
Merge branch 'main' into daydst
jbrockmendel Jul 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/reference/offset_frequency.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,6 @@ Properties
.. autosummary::
:toctree: api/

Day.delta
Day.freqstr
Day.kwds
Day.name
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/timedeltas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Further, operations among the scalars yield another scalar ``Timedelta``.

.. ipython:: python

pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta(
pd.Timedelta(pd.offsets.Hour(48)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta(
"00:00:00.000123"
)

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"is_supported_unit",
"npy_unit_to_abbrev",
"get_supported_reso",
"Day",
]

from pandas._libs.tslibs import dtypes
Expand Down Expand Up @@ -60,6 +61,7 @@
)
from pandas._libs.tslibs.offsets import (
BaseOffset,
Day,
Tick,
to_offset,
)
Expand Down
6 changes: 5 additions & 1 deletion pandas/_libs/tslibs/offsets.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class BaseOffset:
@property
def nanos(self) -> int: ...
def is_anchored(self) -> bool: ...
def _maybe_to_hours(self) -> BaseOffset: ...

def _get_offset(name: str) -> BaseOffset: ...

Expand All @@ -116,10 +117,13 @@ class Tick(SingleConstructorOffset):
def delta(self) -> Timedelta: ...
@property
def nanos(self) -> int: ...
def _maybe_to_hours(self) -> Tick: ...

def delta_to_tick(delta: timedelta) -> Tick: ...

class Day(Tick): ...
class Day(Tick):
def _maybe_to_hours(self) -> Hour: ...

class Hour(Tick): ...
class Minute(Tick): ...
class Second(Tick): ...
Expand Down
66 changes: 51 additions & 15 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,11 @@ cdef class BaseOffset:
def nanos(self):
raise ValueError(f"{self} is a non-fixed frequency")

def _maybe_to_hours(self):
if not isinstance(self, Day):
return self
return Hour(self.n * 24)

def is_anchored(self) -> bool:
# TODO: Does this make sense for the general case? It would help
# if there were a canonical docstring for what is_anchored means.
Expand Down Expand Up @@ -929,8 +934,6 @@ cdef class Tick(SingleConstructorOffset):
# Note: Without making this cpdef, we get AttributeError when calling
# from __mul__
cpdef Tick _next_higher_resolution(Tick self):
if type(self) is Day:
return Hour(self.n * 24)
if type(self) is Hour:
return Minute(self.n * 60)
if type(self) is Minute:
Expand Down Expand Up @@ -1089,13 +1092,43 @@ cdef class Tick(SingleConstructorOffset):
self.normalize = False


cdef class Day(Tick):
_nanos_inc = 24 * 3600 * 1_000_000_000
cdef class Day(SingleConstructorOffset):
_adjust_dst = True
_attributes = tuple(["n", "normalize"])
rule_code = "D" # used by parse_time_string
_prefix = "D"
_td64_unit = "D"
_period_dtype_code = PeriodDtypeCode.D
_creso = NPY_DATETIMEUNIT.NPY_FR_D

def __init__(self, n=1, normalize=False):
BaseOffset.__init__(self, n)
if normalize:
# GH#21427
raise ValueError(
"Day offset with `normalize=True` are not allowed."
)

def is_on_offset(self, dt) -> bool:
return True

@apply_wraps
def _apply(self, other):
if isinstance(other, Day):
# TODO: why isn't this handled in __add__?
return Day(self.n + other.n)
return other + np.timedelta64(self.n, "D")

@apply_array_wraps
def _apply_array(self, dtarr):
return dtarr + np.timedelta64(self.n, "D")

@cache_readonly
def freqstr(self) -> str:
if self.n != 1:
return str(self.n) + "D"
return "D"


cdef class Hour(Tick):
_nanos_inc = 3600 * 1_000_000_000
Expand Down Expand Up @@ -1148,16 +1181,13 @@ cdef class Nano(Tick):
def delta_to_tick(delta: timedelta) -> Tick:
if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0:
# nanoseconds only for pd.Timedelta
if delta.seconds == 0:
return Day(delta.days)
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(seconds / 3600)
elif seconds % 60 == 0:
return Minute(seconds / 60)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(seconds / 3600)
elif seconds % 60 == 0:
return Minute(seconds / 60)
else:
return Second(seconds)
return Second(seconds)
else:
nanos = delta_to_nanoseconds(delta)
if nanos % 1_000_000 == 0:
Expand Down Expand Up @@ -4126,7 +4156,7 @@ cpdef to_offset(freq):
<2 * BusinessDays>

>>> to_offset(pd.Timedelta(days=1))
<Day>
<24 * Hours>

>>> to_offset(pd.offsets.Hour())
<Hour>
Expand Down Expand Up @@ -4165,7 +4195,7 @@ cpdef to_offset(freq):
if not stride:
stride = 1

if prefix in {"D", "H", "T", "S", "L", "U", "N"}:
if prefix in {"H", "T", "S", "L", "U", "N"}:
# For these prefixes, we have something like "3H" or
# "2.5T", so we can construct a Timedelta with the
# matching unit and get our offset from delta_to_tick
Expand All @@ -4183,6 +4213,12 @@ cpdef to_offset(freq):

if delta is None:
delta = offset
elif isinstance(delta, Day) and isinstance(offset, Tick):
# e.g. "1D1H" is treated like "25H"
delta = Hour(delta.n * 24) + offset
elif isinstance(offset, Day) and isinstance(delta, Tick):
# e.g. "1H1D" is treated like "25H"
delta = delta + Hour(offset.n * 24)
else:
delta = delta + offset
except (ValueError, TypeError) as err:
Expand Down
13 changes: 10 additions & 3 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,10 @@ from pandas._libs.tslibs.offsets cimport (
to_offset,
)

from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG
from pandas._libs.tslibs.offsets import (
INVALID_FREQ_ERR_MSG,
Day,
)

cdef:
enum:
Expand Down Expand Up @@ -1736,7 +1739,7 @@ cdef class _Period(PeriodMixin):
cdef:
int64_t inc

if not is_tick_object(self.freq):
if not is_tick_object(self.freq) and not isinstance(self.freq, Day):
raise IncompatibleFrequency("Input cannot be converted to "
f"Period(freq={self.freqstr})")

Expand All @@ -1747,6 +1750,10 @@ cdef class _Period(PeriodMixin):
# i.e. np.timedelta64("nat")
return NaT

if isinstance(other, Day):
# Periods are timezone-naive, so we treat Day as Tick-like
other = np.timedelta64(other.n, "D")

try:
inc = delta_to_nanoseconds(other, reso=self.freq._creso, round_ok=False)
except ValueError as err:
Expand Down Expand Up @@ -1774,7 +1781,7 @@ cdef class _Period(PeriodMixin):
return NaT
return other.__add__(self)

if is_any_td_scalar(other):
if is_any_td_scalar(other) or isinstance(other, Day):
return self._add_timedeltalike_scalar(other)
elif is_offset_object(other):
return self._add_offset(other)
Expand Down
8 changes: 6 additions & 2 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1710,6 +1710,7 @@ class Timedelta(_Timedelta):
) * 1_000_000_000
)

# TODO: catch OverflowError and re-raise as OutOfBoundsTimedelta
value = np.timedelta64(
int(kwargs.get("nanoseconds", 0))
+ int(kwargs.get("microseconds", 0) * 1_000)
Expand Down Expand Up @@ -1824,15 +1825,18 @@ class Timedelta(_Timedelta):

from pandas._libs.tslibs.offsets import to_offset

to_offset(freq).nanos # raises on non-fixed freq
orig = freq
# In this context it is sufficiently clear that "D" this means 24H
freq = to_offset(freq)._maybe_to_hours()
freq.nanos # raises on non-fixed freq
unit = delta_to_nanoseconds(to_offset(freq), self._creso)

arr = np.array([self._value], dtype="i8")
try:
result = round_nsint64(arr, mode, unit)[0]
except OverflowError as err:
raise OutOfBoundsTimedelta(
f"Cannot round {self} to freq={freq} without overflow"
f"Cannot round {self} to freq={orig} without overflow"
) from err
return Timedelta._from_value_and_reso(result, self._creso)

Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,8 @@ class Timestamp(_Timestamp):
cdef:
int64_t nanos

freq = to_offset(freq)
# In this context it is sufficiently clear that "D" this means 24H
freq = to_offset(freq)._maybe_to_hours()
freq.nanos # raises on non-fixed freq
nanos = delta_to_nanoseconds(freq, self._creso)
if nanos == 0:
Expand Down
34 changes: 29 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import (
BaseOffset,
Day,
IncompatibleFrequency,
NaT,
NaTType,
Expand Down Expand Up @@ -889,9 +890,16 @@ def inferred_freq(self) -> str | None:
if self.ndim != 1:
return None
try:
return frequencies.infer_freq(self)
res = frequencies.infer_freq(self)
except ValueError:
return None
if self.dtype.kind == "m" and res is not None and res.endswith("D"):
# TimedeltaArray freq must be a Tick, so we convert the inferred
# daily freq to hourly.
if res == "D":
return "24H"
res = str(int(res[:-1]) * 24) + "H"
return res

@property # NB: override with cache_readonly in immutable subclasses
def _resolution_obj(self) -> Resolution | None:
Expand Down Expand Up @@ -1028,6 +1036,10 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
elif isinstance(self.freq, Tick):
# In these cases
return self.freq
elif isinstance(self.freq, Day) and getattr(self, "tz", None) is None:
return self.freq
# TODO: are there tzaware cases when we can reliably preserve freq?
# We have a bunch of tests that seem to think so
return None

@final
Expand Down Expand Up @@ -1125,6 +1137,9 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
res_m8 = res_values.view(f"timedelta64[{self.unit}]")

new_freq = self._get_arithmetic_result_freq(other)
if new_freq is not None:
# TODO: are we sure this is right?
new_freq = new_freq._maybe_to_hours()
return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)

@final
Expand Down Expand Up @@ -1850,9 +1865,13 @@ def __init__(
if copy:
values = values.copy()
if freq:
if values.dtype.kind == "m" and isinstance(freq, Day):
raise TypeError("TimedeltaArray freq must be a Tick or None")
freq = to_offset(freq)
if values.dtype.kind == "m" and not isinstance(freq, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")
if values.dtype.kind == "m":
freq = freq._maybe_to_hours()
if not isinstance(freq, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")

NDArrayBacked.__init__(self, values=values, dtype=dtype)
self._freq = freq
Expand Down Expand Up @@ -1885,7 +1904,7 @@ def freq(self, value) -> None:
self._freq = value

@classmethod
def _validate_frequency(cls, index, freq, **kwargs):
def _validate_frequency(cls, index, freq: BaseOffset, **kwargs):
"""
Validate that a frequency is compatible with the values of a given
Datetime Array/Index or Timedelta Array/Index
Expand Down Expand Up @@ -2002,7 +2021,9 @@ def _round(self, freq, mode, ambiguous, nonexistent):

values = self.view("i8")
values = cast(np.ndarray, values)
nanos = to_offset(freq).nanos # raises on non-fixed frequencies
# In this context it is clear "D" means "24H""
freq = to_offset(freq)._maybe_to_hours()
nanos = freq.nanos # raises on non-fixed frequencies
nanos = delta_to_nanoseconds(to_offset(freq), self._creso)
result_i8 = round_nsint64(values, mode, nanos)
result = self._maybe_mask_results(result_i8, fill_value=iNaT)
Expand Down Expand Up @@ -2079,6 +2100,9 @@ def _with_freq(self, freq):
assert freq == "infer"
freq = to_offset(self.inferred_freq)

if self.dtype.kind == "m" and freq is not None:
assert isinstance(freq, Tick)

arr = self.view()
arr._freq = freq
return arr
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,10 @@ def _generate_range( # type: ignore[override]
if end is not None:
end = end.tz_localize(None)

if isinstance(freq, Tick):
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
if isinstance(freq, Tick) or (tz is None and isinstance(freq, Day)):
i8values = generate_regular_range(
start, end, periods, freq._maybe_to_hours(), unit=unit
)
else:
xdr = _generate_range(
start=start, end=end, periods=periods, offset=freq, unit=unit
Expand Down
Loading