Skip to content

Commit

Permalink
API/BUG: DatetimeIndex correctly localizes integer data (pandas-dev#2…
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and david-liu-brattle-1 committed Jun 18, 2018
1 parent c18b163 commit 27f984e
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 82 deletions.
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Datetimelike API Changes
Other API Changes
^^^^^^^^^^^^^^^^^

-
- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`)
-
-

Expand Down Expand Up @@ -92,7 +92,7 @@ Datetimelike
^^^^^^^^^^^^

- Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`)
-
- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
-

Timedelta
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,10 @@ def astype(self, dtype, copy=True):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
try:
if is_datetime64tz_dtype(dtype):
from pandas.core.indexes.datetimes import DatetimeIndex
return DatetimeIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
return Index(self.values.astype(dtype, copy=copy), name=self.name,
dtype=dtype)
except (TypeError, ValueError):
Expand Down
82 changes: 33 additions & 49 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,57 +395,43 @@ def __new__(cls, data=None,

# data must be Index or np.ndarray here
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
is_integer_dtype(data)):
is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
data = tools.to_datetime(data, dayfirst=dayfirst,
yearfirst=yearfirst)

if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):

if isinstance(data, DatetimeIndex):
if tz is None:
tz = data.tz
elif data.tz is None:
data = data.tz_localize(tz, ambiguous=ambiguous)
else:
# the tz's must match
if str(tz) != str(data.tz):
msg = ('data is already tz-aware {0}, unable to '
'set specified tz: {1}')
raise TypeError(msg.format(data.tz, tz))
if isinstance(data, DatetimeIndex):
if tz is None:
tz = data.tz
elif data.tz is None:
data = data.tz_localize(tz, ambiguous=ambiguous)
else:
# the tz's must match
if str(tz) != str(data.tz):
msg = ('data is already tz-aware {0}, unable to '
'set specified tz: {1}')
raise TypeError(msg.format(data.tz, tz))

subarr = data.values
subarr = data.values

if freq is None:
freq = data.freq
verify_integrity = False
else:
if data.dtype != _NS_DTYPE:
subarr = conversion.ensure_datetime64ns(data)
else:
subarr = data
if freq is None:
freq = data.freq
verify_integrity = False
elif issubclass(data.dtype.type, np.datetime64):
if data.dtype != _NS_DTYPE:
data = conversion.ensure_datetime64ns(data)
if tz is not None:
# Convert tz-naive to UTC
tz = timezones.maybe_get_tz(tz)
data = conversion.tz_localize_to_utc(data.view('i8'), tz,
ambiguous=ambiguous)
subarr = data.view(_NS_DTYPE)
else:
# must be integer dtype otherwise
if isinstance(data, Int64Index):
raise TypeError('cannot convert Int64Index->DatetimeIndex')
# assume this data are epoch timestamps
if data.dtype != _INT64_DTYPE:
data = data.astype(np.int64)
data = data.astype(np.int64, copy=False)
subarr = data.view(_NS_DTYPE)

if isinstance(subarr, DatetimeIndex):
if tz is None:
tz = subarr.tz
else:
if tz is not None:
tz = timezones.maybe_get_tz(tz)

if (not isinstance(data, DatetimeIndex) or
getattr(data, 'tz', None) is None):
# Convert tz-naive to UTC
ints = subarr.view('i8')
subarr = conversion.tz_localize_to_utc(ints, tz,
ambiguous=ambiguous)
subarr = subarr.view(_NS_DTYPE)

subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
if dtype is not None:
if not is_dtype_equal(subarr.dtype, dtype):
Expand Down Expand Up @@ -807,8 +793,9 @@ def _mpl_repr(self):

@cache_readonly
def _is_dates_only(self):
"""Return a boolean if we are only dates (and don't have a timezone)"""
from pandas.io.formats.format import _is_dates_only
return _is_dates_only(self.values)
return _is_dates_only(self.values) and self.tz is None

@property
def _formatter_func(self):
Expand Down Expand Up @@ -1244,7 +1231,7 @@ def join(self, other, how='left', level=None, return_indexers=False,
See Index.join
"""
if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
other.inferred_type not in ('floating', 'mixed-integer',
other.inferred_type not in ('floating', 'integer', 'mixed-integer',
'mixed-integer-float', 'mixed')):
try:
other = DatetimeIndex(other)
Expand Down Expand Up @@ -2100,8 +2087,9 @@ def normalize(self):
dtype='datetime64[ns, Asia/Calcutta]', freq=None)
"""
new_values = conversion.date_normalize(self.asi8, self.tz)
return DatetimeIndex(new_values, freq='infer', name=self.name,
tz=self.tz)
return DatetimeIndex(new_values,
freq='infer',
name=self.name).tz_localize(self.tz)

@Substitution(klass='DatetimeIndex')
@Appender(_shared_docs['searchsorted'])
Expand Down Expand Up @@ -2182,8 +2170,6 @@ def insert(self, loc, item):
try:
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
self[loc:].asi8))
if self.tz is not None:
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq,
tz=self.tz)
except (AttributeError, TypeError):
Expand Down Expand Up @@ -2221,8 +2207,6 @@ def delete(self, loc):
if (loc.start in (0, None) or loc.stop in (len(self), None)):
freq = self.freq

if self.tz is not None:
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)

def tz_convert(self, tz):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/indexes/datetimes/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,16 @@ def _check_rng(rng):
_check_rng(rng_eastern)
_check_rng(rng_utc)

@pytest.mark.parametrize('tz, dtype', [
['US/Pacific', 'datetime64[ns, US/Pacific]'],
[None, 'datetime64[ns]']])
def test_integer_index_astype_datetime(self, tz, dtype):
# GH 20997, 20964
val = [pd.Timestamp('2018-01-01', tz=tz).value]
result = pd.Index(val).astype(dtype)
expected = pd.DatetimeIndex(['2018-01-01'], tz=tz)
tm.assert_index_equal(result, expected)


class TestToPeriod(object):

Expand Down
60 changes: 39 additions & 21 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pytest
from datetime import timedelta
from operator import attrgetter
from functools import partial

import pytest
import pytz
import numpy as np
from datetime import timedelta

import pandas as pd
from pandas import offsets
Expand All @@ -26,25 +28,28 @@ def test_construction_caching(self):
freq='ns')})
assert df.dttz.dtype.tz.zone == 'US/Eastern'

def test_construction_with_alt(self):

i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern')
i2 = DatetimeIndex(i, dtype=i.dtype)
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'

i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz)
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'

i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype)
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'

i2 = DatetimeIndex(
i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz)
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'
@pytest.mark.parametrize('kwargs', [
{'tz': 'dtype.tz'},
{'dtype': 'dtype'},
{'dtype': 'dtype', 'tz': 'dtype.tz'}])
def test_construction_with_alt(self, kwargs, tz_aware_fixture):
tz = tz_aware_fixture
i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
result = DatetimeIndex(i, **kwargs)
tm.assert_index_equal(i, result)

@pytest.mark.parametrize('kwargs', [
{'tz': 'dtype.tz'},
{'dtype': 'dtype'},
{'dtype': 'dtype', 'tz': 'dtype.tz'}])
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
tz = tz_aware_fixture
i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
expected = i.tz_localize(None).tz_localize('UTC').tz_convert(tz)
tm.assert_index_equal(result, expected)

# localize into the provided tz
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC')
Expand Down Expand Up @@ -478,6 +483,19 @@ def test_constructor_timestamp_near_dst(self):
ts[1].to_pydatetime()])
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('klass', [Index, DatetimeIndex])
@pytest.mark.parametrize('box', [
np.array, partial(np.array, dtype=object), list])
@pytest.mark.parametrize('tz, dtype', [
['US/Pacific', 'datetime64[ns, US/Pacific]'],
[None, 'datetime64[ns]']])
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
# GH 20997, 20964
ts = Timestamp('2018-01-01', tz=tz)
result = klass(box([ts.value]), dtype=dtype)
expected = klass([ts])
assert result == expected


class TestTimeSeries(object):

Expand Down
27 changes: 17 additions & 10 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,26 +402,33 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
index = Index(vals)
assert isinstance(index, TimedeltaIndex)

@pytest.mark.parametrize("values", [
# pass values without timezone, as DatetimeIndex localizes it
pd.date_range('2011-01-01', periods=5).values,
pd.date_range('2011-01-01', periods=5).asi8])
@pytest.mark.parametrize("attr, utc", [
['values', False],
['asi8', True]])
@pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex])
def test_constructor_dtypes_datetime(self, tz_naive_fixture, values,
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc,
klass):
index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture)
# Test constructing with a datetimetz dtype
# .values produces numpy datetimes, so these are considered naive
# .asi8 produces integers, so these are considered epoch timestamps
index = pd.date_range('2011-01-01', periods=5)
arg = getattr(index, attr)
if utc:
index = index.tz_localize('UTC').tz_convert(tz_naive_fixture)
else:
index = index.tz_localize(tz_naive_fixture)
dtype = index.dtype

result = klass(values, tz=tz_naive_fixture)
result = klass(arg, tz=tz_naive_fixture)
tm.assert_index_equal(result, index)

result = klass(values, dtype=dtype)
result = klass(arg, dtype=dtype)
tm.assert_index_equal(result, index)

result = klass(list(values), tz=tz_naive_fixture)
result = klass(list(arg), tz=tz_naive_fixture)
tm.assert_index_equal(result, index)

result = klass(list(values), dtype=dtype)
result = klass(list(arg), dtype=dtype)
tm.assert_index_equal(result, index)

@pytest.mark.parametrize("attr", ['values', 'asi8'])
Expand Down

0 comments on commit 27f984e

Please sign in to comment.