Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API/BUG: DatetimeIndex correctly localizes integer data #21216

Merged
merged 21 commits into from
Jun 14, 2018
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
9626044
BUG: Localize Index with datetime dtype and integer data correctly
mroeschke May 12, 2018
f5235eb
Adjust tests
mroeschke May 14, 2018
82fbf0c
Merge remote-tracking branch 'upstream/master' into index_datetime_int
mroeschke May 19, 2018
c95ca51
Adjust tests
mroeschke May 19, 2018
af3c615
Merge remote-tracking branch 'upstream/master' into index_datetime_int
mroeschke May 19, 2018
cc68764
Refactor processing logic
mroeschke May 20, 2018
ab00008
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke May 22, 2018
86a40a1
Adjust test
mroeschke May 22, 2018
069774b
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke May 23, 2018
74661ca
add additional comment
mroeschke May 23, 2018
de64718
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke May 26, 2018
abf3efc
adjust blank spaces
mroeschke May 26, 2018
3a8a714
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke May 30, 2018
ec4795b
Address review
mroeschke May 30, 2018
b71de82
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke Jun 2, 2018
37484ea
Adjust astype
mroeschke Jun 2, 2018
763a675
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke Jun 6, 2018
dc0a3fe
Add fixture tests, test with object data, dont copy with astype
mroeschke Jun 7, 2018
2bc293d
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke Jun 13, 2018
b9801a3
Merge remote-tracking branch 'upstream/master' into index_datetime_in…
mroeschke Jun 14, 2018
dc7e5c0
add assertion back after merge conflict
mroeschke Jun 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 32 additions & 48 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,57 +394,43 @@ def __new__(cls, data=None,

# data must be Index or np.ndarray here
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
is_integer_dtype(data)):
is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
data = tools.to_datetime(data, dayfirst=dayfirst,
yearfirst=yearfirst)

if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):

if isinstance(data, DatetimeIndex):
if tz is None:
tz = data.tz
elif data.tz is None:
data = data.tz_localize(tz, ambiguous=ambiguous)
else:
# the tz's must match
if str(tz) != str(data.tz):
msg = ('data is already tz-aware {0}, unable to '
'set specified tz: {1}')
raise TypeError(msg.format(data.tz, tz))
if isinstance(data, DatetimeIndex):
if tz is None:
tz = data.tz
elif data.tz is None:
data = data.tz_localize(tz, ambiguous=ambiguous)
else:
# the tz's must match
if str(tz) != str(data.tz):
msg = ('data is already tz-aware {0}, unable to '
'set specified tz: {1}')
raise TypeError(msg.format(data.tz, tz))

subarr = data.values
subarr = data.values

if freq is None:
freq = data.freq
verify_integrity = False
else:
if data.dtype != _NS_DTYPE:
subarr = conversion.ensure_datetime64ns(data)
else:
subarr = data
if freq is None:
freq = data.freq
verify_integrity = False
elif issubclass(data.dtype.type, np.datetime64):
if data.dtype != _NS_DTYPE:
data = conversion.ensure_datetime64ns(data)
if tz is not None:
# Convert tz-naive to UTC
tz = timezones.maybe_get_tz(tz)
data = conversion.tz_localize_to_utc(data.view('i8'), tz,
ambiguous=ambiguous)
subarr = data.view(_NS_DTYPE)
else:
# must be integer dtype otherwise
if isinstance(data, Int64Index):
raise TypeError('cannot convert Int64Index->DatetimeIndex')
# assume this data are epoch timestamps
if data.dtype != _INT64_DTYPE:
data = data.astype(np.int64)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could
data = data.astype(np.int64, copy=False)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we have any tests with a non-int64 dtype, can you add something

subarr = data.view(_NS_DTYPE)

if isinstance(subarr, DatetimeIndex):
if tz is None:
tz = subarr.tz
else:
if tz is not None:
tz = timezones.maybe_get_tz(tz)

if (not isinstance(data, DatetimeIndex) or
getattr(data, 'tz', None) is None):
# Convert tz-naive to UTC
ints = subarr.view('i8')
subarr = conversion.tz_localize_to_utc(ints, tz,
ambiguous=ambiguous)
subarr = subarr.view(_NS_DTYPE)

subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
if dtype is not None:
if not is_dtype_equal(subarr.dtype, dtype):
Expand Down Expand Up @@ -806,8 +792,9 @@ def _mpl_repr(self):

@cache_readonly
def _is_dates_only(self):
"""Return a boolean if we are only dates (and don't have a timezone)"""
from pandas.io.formats.format import _is_dates_only
return _is_dates_only(self.values)
return _is_dates_only(self.values) and self.tz is None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did this need changing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After this refactor, I was getting errors where the repr didn't look correct but the i8 data was correct in the case when a tz was passed in the DatetimeIndex constructor.

_is_dates_only tries to:

return a boolean if we are only dates (and don't have a timezone)

but checks for timezones by re-passing self.values into DatetimeIndex. I am not sure the historical reason why this was the case, but checking self.tz is None fixed the repr issue (and seems to achieve what that function was trying to do in the first place)


@property
def _formatter_func(self):
Expand Down Expand Up @@ -1243,7 +1230,7 @@ def join(self, other, how='left', level=None, return_indexers=False,
See Index.join
"""
if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
other.inferred_type not in ('floating', 'mixed-integer',
other.inferred_type not in ('floating', 'integer', 'mixed-integer',
'mixed-integer-float', 'mixed')):
try:
other = DatetimeIndex(other)
Expand Down Expand Up @@ -2081,8 +2068,9 @@ def normalize(self):
dtype='datetime64[ns, Asia/Calcutta]', freq=None)
"""
new_values = conversion.date_normalize(self.asi8, self.tz)
return DatetimeIndex(new_values, freq='infer', name=self.name,
tz=self.tz)
return DatetimeIndex(new_values,
freq='infer',
name=self.name).tz_localize(self.tz)

@Substitution(klass='DatetimeIndex')
@Appender(_shared_docs['searchsorted'])
Expand Down Expand Up @@ -2163,8 +2151,6 @@ def insert(self, loc, item):
try:
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
self[loc:].asi8))
if self.tz is not None:
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq,
tz=self.tz)
except (AttributeError, TypeError):
Expand Down Expand Up @@ -2202,8 +2188,6 @@ def delete(self, loc):
if (loc.start in (0, None) or loc.stop in (len(self), None)):
freq = self.freq

if self.tz is not None:
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)

def tz_convert(self, tz):
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,15 @@ def test_construction_with_alt(self):
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'

i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz)
i2 = DatetimeIndex(i.asi8, tz=i.dtype.tz)
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'

i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i actually like this construction, can you add a test that does both? (e.g. both uses and does not localize), maybe split it out and parameterize

i2 = DatetimeIndex(i.asi8, dtype=i.dtype)
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'

i2 = DatetimeIndex(
i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz)
i2 = DatetimeIndex(i.asi8, dtype=i.dtype, tz=i.dtype.tz)
tm.assert_index_equal(i, i2)
assert i.tz.zone == 'US/Eastern'

Expand Down Expand Up @@ -469,6 +468,14 @@ def test_constructor_with_non_normalized_pytz(self, tz):
result = DatetimeIndex(['2010'], tz=non_norm_tz)
assert pytz.timezone(tz) is result.tz

@pytest.mark.parametrize('klass', [Index, DatetimeIndex])
@pytest.mark.parametrize('box', [np.array, list])
def test_constructor_with_int_tz(self, klass, box):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parameterize on tz_naive_fixture

ts = Timestamp('2018-01-01', tz='US/Pacific')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the issue number

result = klass(box([ts.value]), dtype='datetime64[ns, US/Pacific]')
expected = klass([ts])
tm.assert_index_equal(result, expected)


class TestTimeSeries(object):

Expand Down
24 changes: 14 additions & 10 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,26 +402,30 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
index = Index(vals)
assert isinstance(index, TimedeltaIndex)

@pytest.mark.parametrize("values", [
# pass values without timezone, as DatetimeIndex localizes it
pd.date_range('2011-01-01', periods=5).values,
pd.date_range('2011-01-01', periods=5).asi8])
@pytest.mark.parametrize("attr, utc", [
['values', False],
['asi8', True]])
@pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex])
def test_constructor_dtypes_datetime(self, tz_naive_fixture, values,
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc,
klass):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a bit of commentary on what is tested here

index = pd.date_range('2011-01-01', periods=5, tz=tz_naive_fixture)
index = pd.date_range('2011-01-01', periods=5)
arg = getattr(index, attr)
if utc:
index = index.tz_localize('UTC').tz_convert(tz_naive_fixture)
else:
index = index.tz_localize(tz_naive_fixture)
dtype = index.dtype

result = klass(values, tz=tz_naive_fixture)
result = klass(arg, tz=tz_naive_fixture)
tm.assert_index_equal(result, index)

result = klass(values, dtype=dtype)
result = klass(arg, dtype=dtype)
tm.assert_index_equal(result, index)

result = klass(list(values), tz=tz_naive_fixture)
result = klass(list(arg), tz=tz_naive_fixture)
tm.assert_index_equal(result, index)

result = klass(list(values), dtype=dtype)
result = klass(list(arg), dtype=dtype)
tm.assert_index_equal(result, index)

@pytest.mark.parametrize("attr", ['values', 'asi8'])
Expand Down