From 7c4efe09b7803967f75656f856138c732a888884 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 11 Jan 2019 08:05:17 -0600 Subject: [PATCH] DEPR/API: Revert handling of i8values to DatetimeIndex (#24708) --- doc/source/whatsnew/v0.24.0.rst | 47 ++++++++++++++++++- pandas/core/arrays/datetimes.py | 43 +++++++++++++++-- pandas/core/indexes/base.py | 10 +++- pandas/core/indexes/datetimes.py | 3 +- pandas/core/reshape/tile.py | 5 +- pandas/tests/dtypes/test_common.py | 14 +++--- pandas/tests/indexes/datetimes/test_astype.py | 4 +- .../indexes/datetimes/test_construction.py | 37 +++++++++++++-- pandas/tests/indexes/multi/test_integrity.py | 4 +- pandas/tests/indexes/test_base.py | 33 +++++++++---- pandas/tests/resample/test_period_index.py | 6 ++- pandas/tests/test_base.py | 4 ++ 12 files changed, 179 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index dd06bade2a203..3950ff3c8863d 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1235,7 +1235,6 @@ Datetimelike API Changes - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) - :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) -- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) - :meth:`PeriodIndex.tz_convert` and :meth:`PeriodIndex.tz_localize` have been removed (:issue:`21781`) .. _whatsnew_0240.api.other: @@ -1353,6 +1352,52 @@ the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`). dti + pd.Index([1 * dti.freq, 2 * dti.freq]) +.. _whatsnew_0240.deprecations.integer_tz: + +Passing Integer data and a timezone to DatetimeIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of :class:`DatetimeIndex` when passed integer data and +a timezone is changing in a future version of pandas. Previously, these +were interpreted as wall times in the desired timezone. In the future, +these will be interpreted as wall times in UTC, which are then converted +to the desired timezone (:issue:`24559`). + +The default behavior remains the same, but issues a warning: + +.. code-block:: ipython + + In [3]: pd.DatetimeIndex([946684800000000000], tz="US/Central") + /bin/ipython:1: FutureWarning: + Passing integer-dtype data and a timezone to DatetimeIndex. Integer values + will be interpreted differently in a future version of pandas. Previously, + these were viewed as datetime64[ns] values representing the wall time + *in the specified timezone*. In the future, these will be viewed as + datetime64[ns] values representing the wall time *in UTC*. This is similar + to a nanosecond-precision UNIX epoch. To accept the future behavior, use + + pd.to_datetime(integer_data, utc=True).tz_convert(tz) + + To keep the previous behavior, use + + pd.to_datetime(integer_data).tz_localize(tz) + + #!/bin/python3 + Out[3]: DatetimeIndex(['2000-01-01 00:00:00-06:00'], dtype='datetime64[ns, US/Central]', freq=None) + +As the warning message explains, opt in to the future behavior by specifying that +the integer values are UTC, and then converting to the final timezone: + +.. ipython:: python + + pd.to_datetime([946684800000000000], utc=True).tz_convert('US/Central') + +The old behavior can be retained with by localizing directly to the final timezone: + +.. ipython:: python + + pd.to_datetime([946684800000000000]).tz_localize('US/Central') + .. _whatsnew_0240.deprecations.tz_aware_array: Converting Timezone-Aware Series and Index to NumPy Arrays diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d2d9fcf954fe3..a2d67efbecbba 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -33,6 +33,21 @@ from pandas.tseries.offsets import Day, Tick _midnight = time(0, 0) +# TODO(GH-24559): Remove warning, int_as_wall_time parameter. +_i8_message = """ + Passing integer-dtype data and a timezone to DatetimeIndex. Integer values + will be interpreted differently in a future version of pandas. Previously, + these were viewed as datetime64[ns] values representing the wall time + *in the specified timezone*. In the future, these will be viewed as + datetime64[ns] values representing the wall time *in UTC*. This is similar + to a nanosecond-precision UNIX epoch. To accept the future behavior, use + + pd.to_datetime(integer_data, utc=True).tz_convert(tz) + + To keep the previous behavior, use + + pd.to_datetime(integer_data).tz_localize(tz) +""" def tz_to_dtype(tz): @@ -342,13 +357,15 @@ def _simple_new(cls, values, freq=None, dtype=_NS_DTYPE): @classmethod def _from_sequence(cls, data, dtype=None, copy=False, tz=None, freq=None, - dayfirst=False, yearfirst=False, ambiguous='raise'): + dayfirst=False, yearfirst=False, ambiguous='raise', + int_as_wall_time=False): freq, freq_infer = dtl.maybe_infer_freq(freq) subarr, tz, inferred_freq = sequence_to_dt64ns( data, dtype=dtype, copy=copy, tz=tz, - dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous) + dayfirst=dayfirst, yearfirst=yearfirst, + ambiguous=ambiguous, int_as_wall_time=int_as_wall_time) freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) @@ -1649,7 +1666,8 @@ def to_julian_date(self): def sequence_to_dt64ns(data, dtype=None, copy=False, tz=None, - dayfirst=False, yearfirst=False, ambiguous='raise'): + dayfirst=False, yearfirst=False, ambiguous='raise', + int_as_wall_time=False): """ Parameters ---------- @@ -1661,6 +1679,13 @@ def sequence_to_dt64ns(data, dtype=None, copy=False, yearfirst : bool, default False ambiguous : str, bool, or arraylike, default 'raise' See pandas._libs.tslibs.conversion.tz_localize_to_utc + int_as_wall_time : bool, default False + Whether to treat ints as wall time in specified timezone, or as + nanosecond-precision UNIX epoch (wall time in UTC). + This is used in DatetimeIndex.__init__ to deprecate the wall-time + behaviour. + + ..versionadded:: 0.24.0 Returns ------- @@ -1717,6 +1742,10 @@ def sequence_to_dt64ns(data, dtype=None, copy=False, data, inferred_tz = objects_to_datetime64ns( data, dayfirst=dayfirst, yearfirst=yearfirst) tz = maybe_infer_tz(tz, inferred_tz) + # When a sequence of timestamp objects is passed, we always + # want to treat the (now i8-valued) data as UTC timestamps, + # not wall times. + int_as_wall_time = False # `data` may have originally been a Categorical[datetime64[ns, tz]], # so we need to handle these types. @@ -1744,8 +1773,16 @@ def sequence_to_dt64ns(data, dtype=None, copy=False, else: # must be integer dtype otherwise # assume this data are epoch timestamps + if tz: + tz = timezones.maybe_get_tz(tz) + if data.dtype != _INT64_DTYPE: data = data.astype(np.int64, copy=False) + if int_as_wall_time and tz is not None and not timezones.is_utc(tz): + warnings.warn(_i8_message, FutureWarning, stacklevel=4) + data = conversion.tz_localize_to_utc(data.view('i8'), tz, + ambiguous=ambiguous) + data = data.view(_NS_DTYPE) result = data.view(_NS_DTYPE) if copy: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c853a30c0de79..99114b7dcf34d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -22,7 +22,8 @@ is_dtype_union_equal, is_extension_array_dtype, is_float, is_float_dtype, is_hashable, is_integer, is_integer_dtype, is_interval_dtype, is_iterator, is_list_like, is_object_dtype, is_period_dtype, is_scalar, - is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype) + is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype, + pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDateOffset, ABCDatetimeArray, ABCIndexClass, @@ -732,6 +733,13 @@ def astype(self, dtype, copy=True): from .category import CategoricalIndex return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) + elif is_datetime64tz_dtype(dtype): + # TODO(GH-24559): Remove this block, use the following elif. + # avoid FutureWarning from DatetimeIndex constructor. + from pandas import DatetimeIndex + tz = pandas_dtype(dtype).tz + return (DatetimeIndex(np.asarray(self)) + .tz_localize("UTC").tz_convert(tz)) elif is_extension_array_dtype(dtype): return Index(np.asarray(self), dtype=dtype, copy=copy) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 664ca9c5d2f05..a4e058160e567 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -299,7 +299,8 @@ def __new__(cls, data=None, dtarr = DatetimeArray._from_sequence( data, dtype=dtype, copy=copy, tz=tz, freq=freq, - dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous) + dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous, + int_as_wall_time=True) subarr = cls._simple_new(dtarr, name=name, freq=dtarr.freq, tz=dtarr.tz) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 15df0ca2442fa..c107ed51226b0 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -449,7 +449,10 @@ def _convert_bin_to_datelike_type(bins, dtype): bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is datelike """ - if is_datetime64tz_dtype(dtype) or is_datetime_or_timedelta_dtype(dtype): + if is_datetime64tz_dtype(dtype): + bins = to_datetime(bins.astype(np.int64), + utc=True).tz_convert(dtype.tz) + elif is_datetime_or_timedelta_dtype(dtype): bins = Index(bins.astype(np.int64), dtype=dtype) return bins diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index f0f77b4977610..62e96fd39a759 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -209,8 +209,8 @@ def test_is_datetime64tz_dtype(): assert not com.is_datetime64tz_dtype(object) assert not com.is_datetime64tz_dtype([1, 2, 3]) assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) - assert com.is_datetime64tz_dtype(pd.DatetimeIndex( - [1, 2, 3], tz="US/Eastern")) + assert com.is_datetime64tz_dtype(pd.DatetimeIndex(['2000'], + tz="US/Eastern")) def test_is_timedelta64_dtype(): @@ -286,7 +286,7 @@ def test_is_datetimelike(): assert com.is_datetimelike(pd.PeriodIndex([], freq="A")) assert com.is_datetimelike(np.array([], dtype=np.datetime64)) assert com.is_datetimelike(pd.Series([], dtype="timedelta64[ns]")) - assert com.is_datetimelike(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + assert com.is_datetimelike(pd.DatetimeIndex(["2000"], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") s = pd.Series([], dtype=dtype) @@ -480,7 +480,7 @@ def test_needs_i8_conversion(): assert com.needs_i8_conversion(np.datetime64) assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) assert com.needs_i8_conversion(pd.DatetimeIndex( - [1, 2, 3], tz="US/Eastern")) + ["2000"], tz="US/Eastern")) def test_is_numeric_dtype(): @@ -541,7 +541,7 @@ def test_is_extension_type(check_scipy): assert com.is_extension_type(pd.Series(cat)) assert com.is_extension_type(pd.SparseArray([1, 2, 3])) assert com.is_extension_type(pd.SparseSeries([1, 2, 3])) - assert com.is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + assert com.is_extension_type(pd.DatetimeIndex(['2000'], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") s = pd.Series([], dtype=dtype) @@ -635,8 +635,8 @@ def test__get_dtype_fails(input_param): (pd.DatetimeIndex([1, 2]), np.datetime64), (pd.DatetimeIndex([1, 2]).dtype, np.datetime64), ('