Skip to content

Commit

Permalink
BUG: in Timestamp.replace when replacing tzinfo around DST changes (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored Sep 20, 2017
1 parent c5c3642 commit b59f107
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 21 deletions.
23 changes: 23 additions & 0 deletions asv_bench/benchmarks/timestamp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .pandas_vb_common import *
from pandas import to_timedelta, Timestamp
import pytz
import datetime


class TimestampProperties(object):
Expand Down Expand Up @@ -58,3 +60,24 @@ def time_is_leap_year(self):

def time_microsecond(self):
self.ts.microsecond


class TimestampOps(object):
goal_time = 0.2

def setup(self):
self.ts = Timestamp('2017-08-25 08:16:14')
self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')

dt = datetime.datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
self.ts2 = Timestamp(dt)

def time_replace_tz(self):
self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))

def time_replace_across_dst(self):
self.ts2.replace(tzinfo=self.tzinfo)

def time_replace_None(self):
self.ts_tz.replace(tzinfo=None)
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ Conversion
- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
- Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`)
- Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`)

Indexing
^^^^^^^^
Expand Down
48 changes: 27 additions & 21 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):

cdef:
Py_ssize_t i, n = len(arr)
ndarray[int64_t] trans, deltas
pandas_datetimestruct dts
object dt
int64_t value
Expand Down Expand Up @@ -417,8 +418,9 @@ class Timestamp(_Timestamp):

def _round(self, freq, rounder):

cdef int64_t unit
cdef object result, value
cdef:
int64_t unit, r, value, buff = 1000000
object result

from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
Expand All @@ -429,16 +431,15 @@ class Timestamp(_Timestamp):
if unit < 1000 and unit % 1000 != 0:
# for nano rounding, work with the last 6 digits separately
# due to float precision
buff = 1000000
result = (buff * (value // buff) + unit *
(rounder((value % buff) / float(unit))).astype('i8'))
r = (buff * (value // buff) + unit *
(rounder((value % buff) / float(unit))).astype('i8'))
elif unit >= 1000 and unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))
result = (unit * rounder(value / float(unit)).astype('i8'))
r = (unit * rounder(value / float(unit)).astype('i8'))
else:
result = (unit * rounder(value / float(unit)).astype('i8'))
result = Timestamp(result, unit='ns')
r = (unit * rounder(value / float(unit)).astype('i8'))
result = Timestamp(r, unit='ns')
if self.tz is not None:
result = result.tz_localize(self.tz)
return result
Expand Down Expand Up @@ -683,14 +684,16 @@ class Timestamp(_Timestamp):

cdef:
pandas_datetimestruct dts
int64_t value
int64_t value, value_tz, offset
object _tzinfo, result, k, v
datetime ts_input

# set to naive if needed
_tzinfo = self.tzinfo
value = self.value
if _tzinfo is not None:
value = tz_convert_single(value, 'UTC', _tzinfo)
value_tz = tz_convert_single(value, _tzinfo, 'UTC')
value += value - value_tz

# setup components
pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
Expand Down Expand Up @@ -724,16 +727,14 @@ class Timestamp(_Timestamp):
_tzinfo = tzinfo

# reconstruct & check bounds
value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
dts.sec, dts.us, tzinfo=_tzinfo)
ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0)
value = ts.value + (dts.ps // 1000)
if value != NPY_NAT:
_check_dts_bounds(&dts)

# set tz if needed
if _tzinfo is not None:
value = tz_convert_single(value, _tzinfo, 'UTC')

result = create_timestamp_from_ts(value, dts, _tzinfo, self.freq)
return result
return create_timestamp_from_ts(value, dts, _tzinfo, self.freq)

def isoformat(self, sep='T'):
base = super(_Timestamp, self).isoformat(sep=sep)
Expand Down Expand Up @@ -1175,7 +1176,7 @@ cdef class _Timestamp(datetime):
return np.datetime64(self.value, 'ns')

def __add__(self, other):
cdef int64_t other_int
cdef int64_t other_int, nanos

if is_timedelta64_object(other):
other_int = other.astype('timedelta64[ns]').view('i8')
Expand Down Expand Up @@ -1625,6 +1626,10 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
"""
Take a TSObject in UTC and localizes to timezone tz.
"""
cdef:
ndarray[int64_t] trans, deltas
Py_ssize_t delta, posn

if is_utc(tz):
obj.tzinfo = tz
elif is_tzlocal(tz):
Expand Down Expand Up @@ -1676,7 +1681,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
obj.tzinfo = tz


def _localize_pydatetime(object dt, object tz):
cpdef inline object _localize_pydatetime(object dt, object tz):
"""
Take a datetime/Timestamp in UTC and localizes to timezone tz.
"""
Expand Down Expand Up @@ -3892,7 +3897,7 @@ for _maybe_method_name in dir(NaTType):
# Conversion routines


def _delta_to_nanoseconds(delta):
cpdef int64_t _delta_to_nanoseconds(delta):
if isinstance(delta, np.ndarray):
return delta.astype('m8[ns]').astype('int64')
if hasattr(delta, 'nanos'):
Expand Down Expand Up @@ -4137,7 +4142,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
return result


def tz_convert_single(int64_t val, object tz1, object tz2):
cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
"""
Convert the val (in i8) from timezone1 to timezone2
Expand Down Expand Up @@ -5006,6 +5011,7 @@ cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil:
def dates_normalized(ndarray[int64_t] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans, deltas
pandas_datetimestruct dts

if tz is None or is_utc(tz):
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/tseries/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,27 @@ def test_ambiguous_compat(self):
assert (result_pytz.to_pydatetime().tzname() ==
result_dateutil.to_pydatetime().tzname())

def test_replace_tzinfo(self):
# GH 15683
dt = datetime(2016, 3, 27, 1)
tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo

result_dt = dt.replace(tzinfo=tzinfo)
result_pd = Timestamp(dt).replace(tzinfo=tzinfo)

if hasattr(result_dt, 'timestamp'): # New method in Py 3.3
assert result_dt.timestamp() == result_pd.timestamp()
assert result_dt == result_pd
assert result_dt == result_pd.to_pydatetime()

result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None)
result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None)

if hasattr(result_dt, 'timestamp'): # New method in Py 3.3
assert result_dt.timestamp() == result_pd.timestamp()
assert result_dt == result_pd
assert result_dt == result_pd.to_pydatetime()

def test_index_equals_with_tz(self):
left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')
Expand Down

0 comments on commit b59f107

Please sign in to comment.