Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: de-duplicate DST tzconversion code #35077

Closed
wants to merge 46 commits into from
Closed
Show file tree
Hide file tree
Changes from 41 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
4b8c4fb
REF: implement TZConvertInfo
jbrockmendel Jun 30, 2020
d8bffdd
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jun 30, 2020
fc1ad75
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jun 30, 2020
998341e
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
8114413
setup_cache->setup
jbrockmendel Jul 1, 2020
907f9c4
revert
jbrockmendel Jul 1, 2020
addf931
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
b2154d4
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
9bcccc0
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
bb7e60d
ensure initialized
jbrockmendel Jul 1, 2020
407f266
ensure initialized
jbrockmendel Jul 1, 2020
5adca21
debuggina ssertions
jbrockmendel Jul 1, 2020
69bfb80
debuggina ssertions
jbrockmendel Jul 1, 2020
13244c8
debuggina ssertions
jbrockmendel Jul 1, 2020
c0f8b34
debuggina ssertions
jbrockmendel Jul 1, 2020
8620905
debuggina ssertions
jbrockmendel Jul 1, 2020
3605695
debuggina ssertions
jbrockmendel Jul 1, 2020
a0eb787
debuggina ssertions
jbrockmendel Jul 1, 2020
c8fcc19
debuggina ssertions
jbrockmendel Jul 1, 2020
db71af5
debuggina ssertions
jbrockmendel Jul 1, 2020
e47e490
debuggina ssertions
jbrockmendel Jul 1, 2020
7f8c717
debuggina ssertions
jbrockmendel Jul 1, 2020
e21cd6a
debuggina ssertions
jbrockmendel Jul 1, 2020
9a47096
debuggina ssertions
jbrockmendel Jul 1, 2020
d6dce1a
debuggina ssertions
jbrockmendel Jul 1, 2020
0dbd8ac
debuggina ssertions
jbrockmendel Jul 1, 2020
f9514b4
debuggina ssertions
jbrockmendel Jul 2, 2020
e198dbd
debuggina ssertions
jbrockmendel Jul 2, 2020
681f5b8
debuggina ssertions
jbrockmendel Jul 2, 2020
e93b961
debuggina ssertions
jbrockmendel Jul 2, 2020
52af5e1
debuggina ssertions
jbrockmendel Jul 2, 2020
26d0d3a
debuggina ssertions
jbrockmendel Jul 2, 2020
3b220fb
debuggina ssertions
jbrockmendel Jul 2, 2020
3c1bf60
debuggina ssertions
jbrockmendel Jul 2, 2020
a70ce3f
debuggina ssertions
jbrockmendel Jul 2, 2020
2478ec3
debuggina ssertions
jbrockmendel Jul 2, 2020
21cbfc2
debuggina ssertions
jbrockmendel Jul 2, 2020
2aa256a
debuggina ssertions
jbrockmendel Jul 2, 2020
7669dc2
debuggina ssertions
jbrockmendel Jul 2, 2020
2bfb9df
debuggina ssertions
jbrockmendel Jul 2, 2020
3dd2957
CLN
jbrockmendel Jul 2, 2020
b31c40e
debugging assertions
jbrockmendel Jul 2, 2020
8b2e9a3
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 7, 2020
e5b73c7
Implement TZ
jbrockmendel Jul 7, 2020
184e188
implement as cdef class
jbrockmendel Jul 7, 2020
b974ec7
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions asv_bench/benchmarks/tslibs/resolution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
ipython analogue:

tr = TimeResolution()
mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],))
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])

for unit in tr.params[0]:
for size in tr.params[1]:
for tz in tr.params[2]:
tr.setup(unit, size, tz)
key = (unit, size, str(tz))
print(key)

val = %timeit -o tr.time_get_resolution(unit, size, tz)

df.loc[key] = (val.average, val.stdev)

"""
from datetime import timedelta, timezone

from dateutil.tz import gettz, tzlocal
import numpy as np
import pytz

from pandas._libs.tslibs.resolution import get_resolution


class TimeResolution:
params = (
["D", "h", "m", "s", "us", "ns"],
[1, 100, 10 ** 4, 10 ** 6],
[
None,
timezone.utc,
timezone(timedelta(minutes=60)),
pytz.timezone("US/Pacific"),
gettz("Asia/Tokyo"),
tzlocal(),
],
)
param_names = ["unit", "size", "tz"]

def setup(self, unit, size, tz):
arr = np.random.randint(0, 10, size=size, dtype="i8")
arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8")
self.i8data = arr

def time_get_resolution(self, unit, size, tz):
get_resolution(self.i8data, tz)
80 changes: 35 additions & 45 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from cpython.datetime cimport tzinfo

import numpy as np
from numpy cimport ndarray, int64_t, int32_t
from numpy cimport ndarray, int64_t, int32_t, intp_t

from pandas._libs.tslibs.util cimport get_nat

from pandas._libs.tslibs.dtypes import Resolution
from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct, dt64_to_dtstruct)
from pandas._libs.tslibs.timezones cimport (
is_utc, is_tzlocal, get_dst_info)
from pandas._libs.tslibs.timezones cimport get_tzconverter, TZConvertInfo
from pandas._libs.tslibs.ccalendar cimport get_days_in_month
from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal

Expand Down Expand Up @@ -39,51 +38,42 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None):
Py_ssize_t i, n = len(stamps)
npy_datetimestruct dts
int reso = RESO_DAY, curr_reso
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
int64_t local_val, delta

if is_utc(tz) or tz is None:
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i], &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
elif is_tzlocal(tz):
int64_t local_val
TZConvertInfo info
ndarray[intp_t, ndim=1] pos2

info = get_tzconverter(tz, stamps)

if info.use_fixed:
assert info.delta != NPY_NAT
elif not info.use_utc and not info.use_tzlocal:
assert info.utcoffsets is not NULL
assert info.positions is not NULL
pos2 = np.array(<intp_t[:n]>info.positions, dtype=np.intp)
WillAyd marked this conversation as resolved.
Show resolved Hide resolved
for i in range(n):
if stamps[i] == NPY_NAT:
continue
v1 = info.positions[i]
v2 = pos2[i]
assert v1 == v2, (v1, v2)
assert v1 < info.noffsets, (v1, info.noffsets, i, stamps[i])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @WillAyd I could use a fresh pair of eyes on this. This debugging assertion is failing on Linux and im at a loss as to why. The assertion on L265 of tzconversion should behave the same as this, but that one is passing. Any idea why things could change between there and here?

assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets)

for i in range(n):
if stamps[i] == NPY_NAT:
continue

if info.use_utc:
local_val = stamps[i]
elif info.use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
dt64_to_dtstruct(local_val, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
delta = deltas[0]
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i] + delta, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
elif info.use_fixed:
local_val = stamps[i] + info.delta
else:
pos = trans.searchsorted(stamps, side='right') - 1
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
local_val = stamps[i] + info.utcoffsets[info.positions[i]]

dt64_to_dtstruct(local_val, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso

return Resolution(reso)

Expand Down
14 changes: 14 additions & 0 deletions pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from cpython.datetime cimport tzinfo

from numpy cimport int64_t, intp_t, ndarray

cdef tzinfo utc_pytz

cpdef bint is_utc(tzinfo tz)
Expand All @@ -15,3 +17,15 @@ cdef get_utcoffset(tzinfo tz, obj)
cdef bint is_fixed_offset(tzinfo tz)

cdef object get_dst_info(tzinfo tz)


ctypedef struct TZConvertInfo:
bint use_utc
bint use_tzlocal
bint use_fixed
int64_t* utcoffsets
intp_t* positions
int64_t delta
int noffsets

cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values)
60 changes: 57 additions & 3 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ UTC = pytz.utc

import numpy as np
cimport numpy as cnp
from numpy cimport int64_t
from numpy cimport int64_t, intp_t
cnp.import_array()

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -195,10 +195,10 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
return new_trans


cdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef ndarray[int64_t, ndim=1] unbox_utcoffsets(object transinfo):
cdef:
Py_ssize_t i, sz
int64_t[:] arr
ndarray[int64_t, ndim=1] arr

sz = len(transinfo)
arr = np.empty(sz, dtype='i8')
Expand All @@ -212,6 +212,60 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo):
# ----------------------------------------------------------------------
# Daylight Savings

ctypedef struct TZConvertInfo:
bint use_utc
bint use_tzlocal
bint use_fixed
int64_t* utcoffsets
intp_t* positions
int64_t delta
int noffsets


cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values):
cdef:
TZConvertInfo info
ndarray[int64_t, ndim=1] deltas, trans
ndarray[intp_t, ndim=1] pos
str typ
Py_ssize_t n = len(values)

info.use_utc = info.use_tzlocal = info.use_fixed = False
info.delta = NPY_NAT # placeholder
info.utcoffsets = NULL
info.positions = NULL
info.noffsets = 0

if tz is None or is_utc(tz):
info.use_utc = True
elif is_tzlocal(tz):
info.use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
info.noffsets = len(deltas)
if typ not in ["pytz", "dateutil"]:
# Fixed Offset
info.use_fixed = True
info.delta = deltas[0]
else:
info.utcoffsets = <int64_t*>cnp.PyArray_DATA(deltas)
pos = trans.searchsorted(values, side="right") - 1
assert pos.flags["F_CONTIGUOUS"]
assert pos.flags["C_CONTIGUOUS"]

assert (pos.max() < info.noffsets), (pos.max(), info.noffsets)
assert (pos < info.noffsets).all(), (max(pos), info.noffsets)
info.positions = <intp_t*>cnp.PyArray_DATA(pos)

pos2 = np.array(<intp_t[:n]>info.positions, dtype=np.intp)
assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets)

for i in range(n):
p = info.positions[i]
assert p < info.noffsets, (p, info.noffsets)

return info


cdef object get_dst_info(tzinfo tz):
"""
Expand Down