Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/API: implement DayDST #44364

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
24fa858
BUG: freq inference for tz-aware DatetimeIndex
jbrockmendel Nov 12, 2020
7865c23
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Nov 12, 2020
bfd26b8
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Nov 20, 2020
74fb970
Merge branch 'master' into bug-infer_freq
jbrockmendel Feb 10, 2021
0de9228
merge so i can merge, even if not passing
jbrockmendel Jun 9, 2021
555ad03
Merge branch 'master' into bug-infer_freq
jbrockmendel Jun 9, 2021
b5b5357
Merge branch 'master' into bug-infer_freq
jbrockmendel Jun 9, 2021
a9fc8d1
Merge branch 'master' into bug-infer_freq
jbrockmendel Jun 10, 2021
9a43e97
Merge branch 'master' into bug-infer_freq
jbrockmendel Jun 11, 2021
8ef2422
Merge branch 'master' into bug-infer_freq
jbrockmendel Jun 24, 2021
b6d1071
Merge branch 'master' into bug-infer_freq
jbrockmendel Sep 30, 2021
832cdd2
Merge branch 'master' into bug-infer_freq
jbrockmendel Oct 19, 2021
fbd8995
Merge branch 'master' into bug-infer_freq
jbrockmendel Oct 29, 2021
68605d1
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 4, 2021
e5a2b07
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 6, 2021
2330a95
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 6, 2021
1bdc7e0
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 8, 2021
7c5ae28
Implement DayDST
jbrockmendel Nov 9, 2021
4cdcbac
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 9, 2021
e743545
interpret D depending on tz
jbrockmendel Nov 9, 2021
27e0a02
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 21, 2021
8cd2dd8
revert last
jbrockmendel Nov 21, 2021
838e519
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 23, 2021
bb54835
fix doctest
jbrockmendel Nov 23, 2021
e9c53e8
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 28, 2021
18468bb
Merge branch 'master' into bug-infer_freq
jbrockmendel Nov 29, 2021
a95ea87
lint fixup
jbrockmendel Nov 29, 2021
17cbffe
Merge branch 'master' into bug-infer_freq
jbrockmendel Dec 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"to_offset",
"Tick",
"BaseOffset",
"DayDST",
"tz_compare",
]

Expand All @@ -41,6 +42,7 @@
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.offsets import (
BaseOffset,
DayDST,
Tick,
to_offset,
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/offsets.pxd
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from numpy cimport int64_t


cpdef to_offset(object obj)
cpdef to_offset(object obj, bint tzaware=*)
cdef bint is_offset_object(object obj)
cdef bint is_tick_object(object obj)

Expand Down
49 changes: 48 additions & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,41 @@ def delta_to_tick(delta: timedelta) -> Tick:
return Nano(nanos)


cdef class DayDST(SingleConstructorOffset):
_adjust_dst = True
_attributes = tuple(["n", "normalize"])
rule_code = "D" # used by parse_time_string

def __init__(self, n=1, normalize=False):
BaseOffset.__init__(self, n)
if normalize:
# GH#21427
raise ValueError(
"Tick offset with `normalize=True` are not allowed."
)

def is_on_offset(self, dt) -> bool:
return True

@apply_wraps
def _apply(self, other):
return other + Timedelta(days=self.n)

@apply_index_wraps
def apply_index(self, dti):
return self._apply_array(dti)

@apply_array_wraps
def _apply_array(self, dtarr):
return dtarr + Timedelta(days=self.n)

@cache_readonly
def freqstr(self) -> str:
if self.n != 1:
return str(self.n) + "DayDST"
return "DayDST"


# --------------------------------------------------------------------

cdef class RelativeDeltaOffset(BaseOffset):
Expand Down Expand Up @@ -3569,14 +3604,16 @@ def _get_offset(name: str) -> BaseOffset:
return _offset_map[name]


cpdef to_offset(freq):
cpdef to_offset(freq, bint tzaware=False):
"""
Return DateOffset object from string or tuple representation
or datetime.timedelta object.

Parameters
----------
freq : str, tuple, datetime.timedelta, DateOffset or None
tzaware : bool, default False
If we have a string "D", whether to interpret that as DayDST.

Returns
-------
Expand Down Expand Up @@ -3629,6 +3666,14 @@ cpdef to_offset(freq):
delta = None
stride_sign = None

if freq.endswith("DayDST"):
head = freq[:-6]
if len(head):
n = int(head)
else:
n = 1
return DayDST(n)

try:
split = opattern.split(freq)
if split[-1] != "" and not split[-1].isspace():
Expand Down Expand Up @@ -3673,6 +3718,8 @@ cpdef to_offset(freq):
if delta is None:
raise ValueError(INVALID_FREQ_ERR_MSG.format(freq))

if type(delta) is Day and tzaware:
return DayDST(delta.n)
return delta


Expand Down
8 changes: 7 additions & 1 deletion pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,11 @@ from pandas._libs.tslibs.offsets cimport (
to_offset,
)

from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG
from pandas._libs.tslibs.offsets import (
INVALID_FREQ_ERR_MSG,
Day,
DayDST,
)

cdef:
enum:
Expand Down Expand Up @@ -1629,6 +1633,8 @@ cdef class _Period(PeriodMixin):
freq = dtype.date_offset

freq = to_offset(freq)
if isinstance(freq, DayDST):
freq = Day(freq.n)

if freq.n <= 0:
raise ValueError("Frequency must be positive, because it "
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,7 @@ class Timedelta(_Timedelta):
ndarray[int64_t] arr

from pandas._libs.tslibs.offsets import to_offset
unit = to_offset(freq).nanos
unit = to_offset(freq, tzaware=False).nanos

arr = np.array([self.value], dtype="i8")
result = round_nsint64(arr, mode, unit)[0]
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from pandas._libs.tslibs import (
BaseOffset,
DayDST,
IncompatibleFrequency,
NaT,
NaTType,
Expand Down Expand Up @@ -1081,8 +1082,9 @@ def _add_timedeltalike_scalar(self, other):
new_values = new_values.view(self._ndarray.dtype)

new_freq = None
if isinstance(self.freq, Tick) or is_period_dtype(self.dtype):
if isinstance(self.freq, (Tick, DayDST)) or is_period_dtype(self.dtype):
# adding a scalar preserves freq
# TODO: sure this is accurate for DayDST
new_freq = self.freq

# error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked"
Expand Down
19 changes: 17 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
from pandas.tseries.offsets import (
BDay,
Day,
DayDST,
Tick,
)

Expand Down Expand Up @@ -365,7 +366,18 @@ def _from_sequence_not_strict(
ambiguous=ambiguous,
)

freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
try:
freq, freq_infer = dtl.validate_inferred_freq(
freq, inferred_freq, freq_infer
)
except ValueError:
if isinstance(freq, Tick) and isinstance(inferred_freq, DayDST):
# It is possible that both could be valid, so we'll
# go through _validate_frequency below
inferred_freq = None
freq_infer = False
else:
raise
if explicit_none:
freq = None

Expand Down Expand Up @@ -432,10 +444,13 @@ def _generate_range(
end, end_tz, end, freq, tz, ambiguous, nonexistent
)
if freq is not None:
# FIXME: dont do this
# We break Day arithmetic (fixed 24 hour) here and opt for
# Day to mean calendar day (23/24/25 hour). Therefore, strip
# tz info from start and day to avoid DST arithmetic
if isinstance(freq, Day):
if isinstance(freq, (Day, DayDST)):
if tz is not None:
freq = DayDST(freq.n)
if start is not None:
start = start.tz_localize(None)
if end is not None:
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from pandas._libs.tslibs.dtypes import FreqGroup
from pandas._libs.tslibs.fields import isleapyear_arr
from pandas._libs.tslibs.offsets import (
Day,
DayDST,
Tick,
delta_to_tick,
)
Expand Down Expand Up @@ -1108,6 +1110,8 @@ def dt64arr_to_periodarr(data, freq, tz=None):
elif isinstance(data, (ABCIndex, ABCSeries)):
data = data._values

if isinstance(freq, DayDST):
freq = Day(freq.n)
freq = Period._maybe_convert_freq(freq)
base = freq._period_dtype_code
return c_dt64arr_to_periodarr(data.view("i8"), base, tz), freq
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@ def date_range(
DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00',
'2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00',
'2018-01-05 00:00:00+09:00'],
dtype='datetime64[ns, Asia/Tokyo]', freq='D')
dtype='datetime64[ns, Asia/Tokyo]', freq='DayDST')

`closed` controls whether to include `start` and `end` that are on the
boundary. The default includes boundary points on either end.
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_setitem_invalidates_datetime_index_freq(self):
# `freq` attribute on the underlying DatetimeIndex

dti = date_range("20130101", periods=3, tz="US/Eastern")
orig_freq = dti.freq
ts = dti[1]

df = DataFrame({"B": dti})
Expand All @@ -50,7 +51,7 @@ def test_setitem_invalidates_datetime_index_freq(self):
assert df["B"]._values.freq is None

# check that the DatetimeIndex was not altered in place
assert dti.freq == "D"
assert dti.freq is orig_freq
assert dti[1] == ts

def test_cast_internals(self, float_frame):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_getitem(self):
result = idx[4::-1]
expected = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
freq="-1D",
freq=-1 * idx.freq,
tz=idx.tz,
name="idx",
)
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,10 @@ def test_setops_preserve_freq(self, tz):

result = rng[:50].intersection(rng[25:75])
assert result.name == rng.name
assert result.freqstr == "D"
if tz is None:
assert result.freqstr == "D"
else:
assert result.freqstr == "DayDST"
assert result.tz == rng.tz

nofreq = DatetimeIndex(list(rng[25:75]), name="other")
Expand Down
12 changes: 8 additions & 4 deletions pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ def test_resample_origin_epoch_with_tz_day_vs_24h():

result_1 = ts_1.resample("D", origin="epoch").mean()
result_2 = ts_1.resample("24H", origin="epoch").mean()
tm.assert_series_equal(result_1, result_2)
tm.assert_series_equal(result_1, result_2, check_freq=False)

# check that we have the same behavior with epoch even if we are not timezone aware
ts_no_tz = ts_1.tz_localize(None)
Expand All @@ -897,7 +897,7 @@ def test_resample_origin_with_day_freq_on_dst():
# GH 31809
tz = "America/Chicago"

def _create_series(values, timestamps, freq="D"):
def _create_series(values, timestamps, freq="DayDST"):
return Series(
values,
index=DatetimeIndex(
Expand Down Expand Up @@ -1484,7 +1484,7 @@ def test_resample_dst_anchor():
dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz="US/Eastern")
df = DataFrame([5], index=dti)

dti = DatetimeIndex(df.index.normalize(), freq="D")
dti = DatetimeIndex(df.index.normalize(), freq="DayDST")
expected = DataFrame([5], index=dti)
tm.assert_frame_equal(df.resample(rule="D").sum(), expected)
df.resample(rule="MS").sum()
Expand Down Expand Up @@ -1618,7 +1618,11 @@ def test_downsample_dst_at_midnight():
dti = date_range("2018-11-03", periods=3).tz_localize(
"America/Havana", ambiguous=True
)
dti = DatetimeIndex(dti, freq="D")
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst"):
# Check that we are requiring ambiguous be passed explicitly
dti = DatetimeIndex(dti, freq="D")
dti = DatetimeIndex(dti, freq="DayDST", ambiguous=True)

expected = DataFrame([7.5, 28.0, 44.5], index=dti)
tm.assert_frame_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_resample_with_pytz(self):
expected = Series(
2.0,
index=pd.DatetimeIndex(
["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D"
["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="DayDST"
),
)
tm.assert_series_equal(result, expected)
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/series/accessors/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ def test_dt_namespace_accessor_datetime64tz(self):
tz_result = result.dt.tz
assert str(tz_result) == "CET"
freq_result = ser.dt.freq
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
assert (
freq_result
== DatetimeIndex(ser._values._with_freq(None), freq="infer").freq
)

def test_dt_namespace_accessor_timedelta(self):
# GH#7207, GH#11128
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,19 +323,20 @@ def test_setitem_invalidates_datetime_index_freq(self):
# `freq` attribute on the underlying DatetimeIndex

dti = date_range("20130101", periods=3, tz="US/Eastern")
orig_freq = dti.freq
ts = dti[1]
ser = Series(dti)
assert ser._values is not dti
assert ser._values._data.base is not dti._data._data.base
assert dti.freq == "D"
assert dti.freq is orig_freq
ser.iloc[1] = NaT
assert ser._values.freq is None

# check that the DatetimeIndex was not altered in place
assert ser._values is not dti
assert ser._values._data.base is not dti._data._data.base
assert dti[1] == ts
assert dti.freq == "D"
assert dti.freq is orig_freq

def test_dt64tz_setitem_does_not_mutate_dti(self):
# GH#21907, GH#24096
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/tseries/frequencies/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ def test_infer_freq_index(freq, expected):
def test_infer_freq_tz(tz_naive_fixture, expected, dates):
# see gh-7310
tz = tz_naive_fixture

if expected == "D" and tz is not None:
expected = "DayDST"

idx = DatetimeIndex(dates, tz=tz)
assert idx.inferred_freq == expected

Expand Down Expand Up @@ -380,6 +384,24 @@ def test_infer_freq_business_hour(data, expected):
assert idx.inferred_freq == expected


def test_infer_freq_across_dst_not_daily():
# GH#37295
dti = date_range(
start=Timestamp("2019-03-26 00:00:00-0400", tz="Canada/Eastern"),
end=Timestamp("2020-10-17 00:00:00-0400", tz="Canada/Eastern"),
freq="D",
)
assert dti.freq == "DayDST"

diff = dti - dti.shift()
assert not diff.is_unique

assert dti.inferred_freq == "DayDST"

dti2 = DatetimeIndex(dti._with_freq(None), freq="infer")
assert dti2.freq == "DayDST"


def test_not_monotonic():
rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
rng = rng[::-1]
Expand Down
Loading