diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6d5e40d37c8df..c29197725a2b6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -36,7 +36,7 @@ Datetimelike API Changes Other API Changes ^^^^^^^^^^^^^^^^^ -- +- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) - - @@ -92,7 +92,7 @@ Datetimelike ^^^^^^^^^^^^ - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) -- +- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) - Timedelta diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d9e4ef7db1158..36345a32a3bf7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1175,6 +1175,10 @@ def astype(self, dtype, copy=True): return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) try: + if is_datetime64tz_dtype(dtype): + from pandas.core.indexes.datetimes import DatetimeIndex + return DatetimeIndex(self.values, name=self.name, dtype=dtype, + copy=copy) return Index(self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype) except (TypeError, ValueError): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 66622814f172d..e944df7aa83c6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -395,57 +395,43 @@ def __new__(cls, data=None, # data must be Index or np.ndarray here if not (is_datetime64_dtype(data) or is_datetimetz(data) or - is_integer_dtype(data)): + is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data): - - if isinstance(data, DatetimeIndex): - if tz is None: - tz = data.tz - elif data.tz is None: - data = data.tz_localize(tz, ambiguous=ambiguous) - else: - # the tz's must match - if str(tz) != str(data.tz): - msg = ('data is already tz-aware {0}, unable to ' - 'set specified tz: {1}') - raise TypeError(msg.format(data.tz, tz)) + if isinstance(data, DatetimeIndex): + if tz is None: + tz = data.tz + elif data.tz is None: + data = data.tz_localize(tz, ambiguous=ambiguous) + else: + # the tz's must match + if str(tz) != str(data.tz): + msg = ('data is already tz-aware {0}, unable to ' + 'set specified tz: {1}') + raise TypeError(msg.format(data.tz, tz)) - subarr = data.values + subarr = data.values - if freq is None: - freq = data.freq - verify_integrity = False - else: - if data.dtype != _NS_DTYPE: - subarr = conversion.ensure_datetime64ns(data) - else: - subarr = data + if freq is None: + freq = data.freq + verify_integrity = False + elif issubclass(data.dtype.type, np.datetime64): + if data.dtype != _NS_DTYPE: + data = conversion.ensure_datetime64ns(data) + if tz is not None: + # Convert tz-naive to UTC + tz = timezones.maybe_get_tz(tz) + data = conversion.tz_localize_to_utc(data.view('i8'), tz, + ambiguous=ambiguous) + subarr = data.view(_NS_DTYPE) else: # must be integer dtype otherwise - if isinstance(data, Int64Index): - raise TypeError('cannot convert Int64Index->DatetimeIndex') + # assume this data are epoch timestamps if data.dtype != _INT64_DTYPE: - data = data.astype(np.int64) + data = data.astype(np.int64, copy=False) subarr = data.view(_NS_DTYPE) - if isinstance(subarr, DatetimeIndex): - if tz is None: - tz = subarr.tz - else: - if tz is not None: - tz = timezones.maybe_get_tz(tz) - - if (not isinstance(data, DatetimeIndex) or - getattr(data, 'tz', None) is None): - # Convert tz-naive to UTC - ints = subarr.view('i8') - subarr = conversion.tz_localize_to_utc(ints, tz, - ambiguous=ambiguous) - subarr = subarr.view(_NS_DTYPE) - subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) if dtype is not None: if not is_dtype_equal(subarr.dtype, dtype): @@ -807,8 +793,9 @@ def _mpl_repr(self): @cache_readonly def _is_dates_only(self): + """Return a boolean if we are only dates (and don't have a timezone)""" from pandas.io.formats.format import _is_dates_only - return _is_dates_only(self.values) + return _is_dates_only(self.values) and self.tz is None @property def _formatter_func(self): @@ -1244,7 +1231,7 @@ def join(self, other, how='left', level=None, return_indexers=False, See Index.join """ if (not isinstance(other, DatetimeIndex) and len(other) > 0 and - other.inferred_type not in ('floating', 'mixed-integer', + other.inferred_type not in ('floating', 'integer', 'mixed-integer', 'mixed-integer-float', 'mixed')): try: other = DatetimeIndex(other) @@ -2100,8 +2087,9 @@ def normalize(self): dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ new_values = conversion.date_normalize(self.asi8, self.tz) - return DatetimeIndex(new_values, freq='infer', name=self.name, - tz=self.tz) + return DatetimeIndex(new_values, + freq='infer', + name=self.name).tz_localize(self.tz) @Substitution(klass='DatetimeIndex') @Appender(_shared_docs['searchsorted']) @@ -2182,8 +2170,6 @@ def insert(self, loc, item): try: new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) - if self.tz is not None: - new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) except (AttributeError, TypeError): @@ -2221,8 +2207,6 @@ def delete(self, loc): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq - if self.tz is not None: - new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) def tz_convert(self, tz): diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 8acdd301f241a..64b8f48f6a4e1 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -225,6 +225,16 @@ def _check_rng(rng): _check_rng(rng_eastern) _check_rng(rng_utc) + @pytest.mark.parametrize('tz, dtype', [ + ['US/Pacific', 'datetime64[ns, US/Pacific]'], + [None, 'datetime64[ns]']]) + def test_integer_index_astype_datetime(self, tz, dtype): + # GH 20997, 20964 + val = [pd.Timestamp('2018-01-01', tz=tz).value] + result = pd.Index(val).astype(dtype) + expected = pd.DatetimeIndex(['2018-01-01'], tz=tz) + tm.assert_index_equal(result, expected) + class TestToPeriod(object): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index b138b79caac76..f7682a965c038 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -1,8 +1,10 @@ -import pytest +from datetime import timedelta +from operator import attrgetter +from functools import partial +import pytest import pytz import numpy as np -from datetime import timedelta import pandas as pd from pandas import offsets @@ -26,25 +28,28 @@ def test_construction_caching(self): freq='ns')}) assert df.dttz.dtype.tz.zone == 'US/Eastern' - def test_construction_with_alt(self): - - i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') - i2 = DatetimeIndex(i, dtype=i.dtype) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' - - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' - - i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' - - i2 = DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) - tm.assert_index_equal(i, i2) - assert i.tz.zone == 'US/Eastern' + @pytest.mark.parametrize('kwargs', [ + {'tz': 'dtype.tz'}, + {'dtype': 'dtype'}, + {'dtype': 'dtype', 'tz': 'dtype.tz'}]) + def test_construction_with_alt(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = pd.date_range('20130101', periods=5, freq='H', tz=tz) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + result = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(i, result) + + @pytest.mark.parametrize('kwargs', [ + {'tz': 'dtype.tz'}, + {'dtype': 'dtype'}, + {'dtype': 'dtype', 'tz': 'dtype.tz'}]) + def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = pd.date_range('20130101', periods=5, freq='H', tz=tz) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs) + expected = i.tz_localize(None).tz_localize('UTC').tz_convert(tz) + tm.assert_index_equal(result, expected) # localize into the provided tz i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') @@ -478,6 +483,19 @@ def test_constructor_timestamp_near_dst(self): ts[1].to_pydatetime()]) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('klass', [Index, DatetimeIndex]) + @pytest.mark.parametrize('box', [ + np.array, partial(np.array, dtype=object), list]) + @pytest.mark.parametrize('tz, dtype', [ + ['US/Pacific', 'datetime64[ns, US/Pacific]'], + [None, 'datetime64[ns]']]) + def test_constructor_with_int_tz(self, klass, box, tz, dtype): + # GH 20997, 20964 + ts = Timestamp('2018-01-01', tz=tz) + result = klass(box([ts.value]), dtype=dtype) + expected = klass([ts]) + assert result == expected + class TestTimeSeries(object): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c264f5f79e47e..b8bd218ec25ab 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -402,26 +402,33 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals): index = Index(vals) assert isinstance(index, TimedeltaIndex) - @pytest.mark.parametrize("values", [ - # pass values without timezone, as DatetimeIndex localizes it - pd.date_range('2011-01-01', periods=5).values, - pd.date_range('2011-01-01', periods=5).asi8]) + @pytest.mark.parametrize("attr, utc", [ + ['values', False], + ['asi8', True]]) @pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex]) - def test_constructor_dtypes_datetime(self, tz_naive_fixture, values, + def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc, klass): - index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture) + # Test constructing with a datetimetz dtype + # .values produces numpy datetimes, so these are considered naive + # .asi8 produces integers, so these are considered epoch timestamps + index = pd.date_range('2011-01-01', periods=5) + arg = getattr(index, attr) + if utc: + index = index.tz_localize('UTC').tz_convert(tz_naive_fixture) + else: + index = index.tz_localize(tz_naive_fixture) dtype = index.dtype - result = klass(values, tz=tz_naive_fixture) + result = klass(arg, tz=tz_naive_fixture) tm.assert_index_equal(result, index) - result = klass(values, dtype=dtype) + result = klass(arg, dtype=dtype) tm.assert_index_equal(result, index) - result = klass(list(values), tz=tz_naive_fixture) + result = klass(list(arg), tz=tz_naive_fixture) tm.assert_index_equal(result, index) - result = klass(list(values), dtype=dtype) + result = klass(list(arg), dtype=dtype) tm.assert_index_equal(result, index) @pytest.mark.parametrize("attr", ['values', 'asi8'])