From e0f12606b14ae8d5469f79eecf116cd69c1da9fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 1 Jan 2019 12:07:38 -0800 Subject: [PATCH] Make DTI[tz]._values and Series[tz]._values return DTA (#24534) --- pandas/core/dtypes/concat.py | 5 ++--- pandas/core/indexes/datetimes.py | 11 ++++++++++- pandas/core/internals/blocks.py | 16 ++++++++++++---- pandas/core/series.py | 9 ++++----- pandas/tests/indexing/test_coercion.py | 2 +- pandas/tests/test_base.py | 12 ++++-------- 6 files changed, 33 insertions(+), 22 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index a90cfa4e4c906e..0501889d743d4c 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -426,8 +426,7 @@ def _concat_datetime(to_concat, axis=0, typs=None): if any(typ.startswith('datetime') for typ in typs): if 'datetime' in typs: - to_concat = [np.array(x, copy=False).view(np.int64) - for x in to_concat] + to_concat = [x.astype(np.int64, copy=False) for x in to_concat] return _concatenate_2d(to_concat, axis=axis).view(_NS_DTYPE) else: # when to_concat has different tz, len(typs) > 1. @@ -451,7 +450,7 @@ def _convert_datetimelike_to_object(x): # if dtype is of datetimetz or timezone if x.dtype.kind == _NS_DTYPE.kind: if getattr(x, 'tz', None) is not None: - x = x.astype(object).values + x = np.asarray(x.astype(object)) else: shape = x.shape x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5ed8bd45a6affb..5695d3d9e67f31 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -316,6 +316,12 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ + if isinstance(values, DatetimeArray): + values = DatetimeArray(values, freq=freq, tz=tz, dtype=dtype) + tz = values.tz + freq = values.freq + values = values._data + # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes assert isinstance(values, np.ndarray), type(values) @@ -340,7 +346,7 @@ def _values(self): # tz-naive -> ndarray # tz-aware -> DatetimeIndex if self.tz is not None: - return self + return self._eadata else: return self.values @@ -629,6 +635,9 @@ def intersection(self, other): not other.freq.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other) + # Invalidate the freq of `result`, which may not be correct at + # this point, depending on the values. + result.freq = None result = self._shallow_copy(result._values, name=result.name, tz=result.tz, freq=None) if result.freq is None: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 375b4ccbc122fc..c9ed2521676ad2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -34,7 +34,8 @@ _isna_compat, array_equivalent, is_null_datelike_scalar, isna, notna) import pandas.core.algorithms as algos -from pandas.core.arrays import Categorical, ExtensionArray +from pandas.core.arrays import ( + Categorical, DatetimeArrayMixin as DatetimeArray, ExtensionArray) from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexes.datetimes import DatetimeIndex @@ -2437,8 +2438,14 @@ def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ if isinstance(other, ABCDatetimeIndex): - # to store DatetimeTZBlock as object - other = other.astype(object).values + # May get a DatetimeIndex here. Unbox it. + other = other.array + + if isinstance(other, DatetimeArray): + # hit in pandas/tests/indexing/test_coercion.py + # ::TestWhereCoercion::test_where_series_datetime64[datetime64tz] + # when falling back to ObjectBlock.where + other = other.astype(object) return values, other @@ -2985,7 +2992,8 @@ def _try_coerce_args(self, values, other): elif (is_null_datelike_scalar(other) or (lib.is_scalar(other) and isna(other))): other = tslibs.iNaT - elif isinstance(other, self._holder): + elif isinstance(other, (self._holder, DatetimeArray)): + # TODO: DatetimeArray check will be redundant after GH#24024 if other.tz != self.values.tz: raise ValueError("incompatible or non tz-aware value") other = _block_shape(other.asi8, ndim=self.ndim) diff --git a/pandas/core/series.py b/pandas/core/series.py index 672fa2edb00bae..3637081e09f8cc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -477,7 +477,10 @@ def _values(self): """ Return the internal repr of this data. """ - return self._data.internal_values() + result = self._data.internal_values() + if isinstance(result, DatetimeIndex): + result = result._eadata + return result def _formatting_values(self): """ @@ -1602,10 +1605,6 @@ def unique(self): Categories (3, object): [a < b < c] """ result = super(Series, self).unique() - if isinstance(result, DatetimeIndex): - # TODO: This should be unnecessary after Series._values returns - # DatetimeArray - result = result._eadata return result def drop_duplicates(self, keep='first', inplace=False): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 29b60d80750b21..280db3b2b30044 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -31,7 +31,7 @@ def has_test(combo): for combo in combos: if not has_test(combo): msg = 'test method is not defined: {0}, {1}' - raise AssertionError(msg.format(type(cls), combo)) + raise AssertionError(msg.format(cls.__name__, combo)) yield diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 50db4f67cc3cff..f941f2ff32fa18 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -17,12 +17,12 @@ PeriodIndex, Timedelta, IntervalIndex, Interval, CategoricalIndex, Timestamp, DataFrame, Panel) from pandas.core.arrays import ( + PandasArray, DatetimeArrayMixin as DatetimeArray, TimedeltaArrayMixin as TimedeltaArray, ) from pandas.compat import StringIO, PYPY, long from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.arrays import PandasArray from pandas.core.accessor import PandasDelegate from pandas.core.base import PandasObject, NoNewAttributesMixin from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -388,11 +388,9 @@ def test_value_counts_unique_nunique(self): for r in result: assert isinstance(r, Timestamp) - # TODO(#24024) once orig._values returns DTA, remove - # the `._eadata` below tm.assert_numpy_array_equal( result.astype(object), - orig._values._eadata.astype(object)) + orig._values.astype(object)) else: tm.assert_numpy_array_equal(result, orig.values) @@ -418,9 +416,7 @@ def test_value_counts_unique_nunique_null(self): else: o = o.copy() o[0:2] = iNaT - # TODO(#24024) once Series._values returns DTA, remove - # the `._eadata` here - values = o._values._eadata + values = o._values elif needs_i8_conversion(o): values[0:2] = iNaT @@ -1158,7 +1154,7 @@ def test_iter_box(self): (np.array(['a', 'b']), np.ndarray, 'object'), (pd.Categorical(['a', 'b']), pd.Categorical, 'category'), (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'), - (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex, + (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), DatetimeArray, 'datetime64[ns, US/Central]'), (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'), (pd.PeriodIndex([2018, 2019], freq='A'), pd.core.arrays.PeriodArray,