From 52e4d6bbfcc1406e9f98b1f95dc1713a5cc0375c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 11 Nov 2018 20:19:27 -0800 Subject: [PATCH 1/8] implement most of the rest of EA interface --- pandas/core/arrays/datetimelike.py | 79 ++++++++++++++++++++++++++++- pandas/core/arrays/datetimes.py | 38 ++++++++++++-- pandas/core/arrays/period.py | 53 +++++-------------- pandas/core/arrays/timedeltas.py | 17 ++++++- pandas/core/indexes/datetimelike.py | 7 ++- pandas/core/indexes/datetimes.py | 16 +++--- 6 files changed, 155 insertions(+), 55 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3fa4f503d2dd5..adace2d712544 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -39,7 +39,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.common as com -from pandas.core.algorithms import checked_add_with_arr +from pandas.core.algorithms import checked_add_with_arr, take, unique1d from .base import ExtensionOpsMixin from pandas.util._decorators import deprecate_kwarg @@ -127,6 +127,10 @@ def asi8(self): # ------------------------------------------------------------------ # Array-like Methods + @property + def nbytes(self): + return self._data.nbytes + @property def shape(self): return (len(self),) @@ -192,6 +196,79 @@ def astype(self, dtype, copy=True): return self._box_values(self.asi8) return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) + # ------------------------------------------------------------------ + # ExtensionArray Interface + # TODO: + # * argsort / _values_for_argsort + # * _reduce + + def unique(self): + result = unique1d(self.asi8) + return type(self)(result, dtype=self.dtype) + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to an i8 representation, + raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : np.int64 + + Raises + ------ + ValueError + """ + raise AbstractMethodError(self) + + def take(self, indices, allow_fill=False, fill_value=None): + if allow_fill: + fill_value = self._validate_fill_value(fill_value) + + new_values = take(self.asi8, + indices, + allow_fill=allow_fill, + fill_value=fill_value) + + return type(self)(new_values, dtype=self.dtype) + + @classmethod + def _concat_same_type(cls, to_concat): + # for TimedeltaArray and PeriodArray; DatetimeArray overrides + freqs = {x.freq for x in to_concat} + assert len(freqs) == 1 + freq = list(freqs)[0] + values = np.concatenate([x.asi8 for x in to_concat]) + return cls._simple_new(values, freq=freq) + + def copy(self, deep=False): + values = self.asi8 + if deep: + values = i8values.copy() + return type(self)(values, dtype=self.dtype, freq=self.freq) + + def _values_for_factorize(self): + return self.asi8, iNaT + + @classmethod + def _from_factorized(cls, values, original): + if is_datetime64tz_dtype(original): + return cls(values, tz=original.tz, freq=original.freq) + return cls(values, freq=original.freq) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + arr = np.asarray(scalars, dtype=object) + if copy: + arr = arr.copy() + + # If necessary this will infer tz from dtype + return cls(arr, dtype=dtype) + # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b0485cc82f07f..26ea4778fc1c1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -12,7 +12,7 @@ conversion, fields, timezones, resolution as libresolution) -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import cache_readonly, Appender from pandas.errors import PerformanceWarning from pandas import compat @@ -294,7 +294,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if tz is not None and index.tz is None: arr = conversion.tz_localize_to_utc( - ensure_int64(index.values), + index.asi8, tz, ambiguous=ambiguous) index = cls(arr) @@ -317,7 +317,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if not right_closed and len(index) and index[-1] == end: index = index[:-1] - return cls._simple_new(index.values, freq=freq, tz=tz) + return cls._simple_new(index.asi8, freq=freq, tz=tz) # ----------------------------------------------------------------- # Descriptive Properties @@ -419,6 +419,38 @@ def __iter__(self): for v in converted: yield v + # ---------------------------------------------------------------- + # ExtensionArray Interface + + @property + def _ndarray_values(self): + return self._data + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (datetime, np.datetime64)): + self._assert_tzawareness_compat(fill_value) + fill_value = Timestamp(fill_value).value + else: + raise ValueError("'fill_value' should be a Timestamp. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + + @classmethod + def _concat_same_type(cls, to_concat): + freqs = {x.freq for x in to_concat} + assert len(freqs) == 1 + freq = list(freqs)[0] + + tzs = {x.tz for x in to_concat} + assert len(tzs) == 1 + tz = list(tzs)[0] + + values = np.concatenate([x.asi8 for x in to_concat]) + return cls._simple_new(values, freq=freq, tz=tz) + # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 482968fdb4766..5ecc027a6dc77 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -216,14 +216,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): ordinals = libperiod.extract_ordinals(periods, freq) return cls(ordinals, freq=freq) - def _values_for_factorize(self): - return self.asi8, iNaT - - @classmethod - def _from_factorized(cls, values, original): - # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray - return cls(values, freq=original.freq) - @classmethod def _from_datetime64(cls, data, freq, tz=None): """Construct a PeriodArray from a datetime64 array @@ -262,14 +254,6 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq - @classmethod - def _concat_same_type(cls, to_concat): - freq = {x.freq for x in to_concat} - assert len(freq) == 1 - freq = list(freq)[0] - values = np.concatenate([x._data for x in to_concat]) - return cls(values, freq=freq) - # -------------------------------------------------------------------- # Data / Attributes @property @@ -379,29 +363,20 @@ def __setitem__( raise TypeError(msg) self._data[key] = value - def take(self, indices, allow_fill=False, fill_value=None): - if allow_fill: - if isna(fill_value): - fill_value = iNaT - elif isinstance(fill_value, Period): - if self.freq != fill_value.freq: - msg = DIFFERENT_FREQ_INDEX.format( - self.freq.freqstr, - fill_value.freqstr - ) - raise IncompatibleFrequency(msg) - - fill_value = fill_value.ordinal - else: - msg = "'fill_value' should be a Period. Got '{}'." - raise ValueError(msg.format(fill_value)) - - new_values = algos.take(self._data, - indices, - allow_fill=allow_fill, - fill_value=fill_value) - - return type(self)(new_values, self.freq) + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, Period): + if fill_value.freq != self.freq: + msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr, + fill_value.freqstr) + raise IncompatibleFrequency(msg) + fill_value = fill_value.ordinal + else: + raise ValueError("'fill_value' should be a Period. " + "Got '{got}'.".format(got=fill_value)) + return fill_value def fillna(self, value=None, method=None, limit=None): # TODO(#20300) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1f78e0c00bf00..515e3dbec26b5 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -9,6 +9,7 @@ from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, parse_timedelta_unit) +from pandas.util._decorators import Appender from pandas import compat @@ -139,7 +140,7 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result._freq = freq return result - def __new__(cls, values, freq=None): + def __new__(cls, values, freq=None, dtype=_TD_DTYPE): freq, freq_infer = dtl.maybe_infer_freq(freq) @@ -190,6 +191,20 @@ def _generate_range(cls, start, end, periods, freq, closed=None): return cls._simple_new(index, freq=freq) + # ---------------------------------------------------------------- + # ExtensionArray Interface + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (timedelta, np.timedelta64, Tick)): + fill_value = Timedelta(fill_value).value + else: + raise ValueError("'fill_value' should be a Timedelta. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 59429488a7c2f..f26846fc81fca 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -225,6 +225,11 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'): class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ + # override DatetimeLikeArrayMixin method + copy = Index.copy + unique = Index.unique + take = Index.take + # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable @@ -771,7 +776,7 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): try: return np.array(other, copy=False).view('i8') except TypeError: - # period array cannot be coerces to int + # period array cannot be coerced to int other = Index(other) return other.asi8 diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c82cff19573e3..39ffaf8811842 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -568,16 +568,12 @@ def snap(self, freq='S'): # TODO: what about self.name? if so, use shallow_copy? def unique(self, level=None): - # Override here since IndexOpsMixin.unique uses self._values.unique - # For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error - # So we extract the tz-naive DatetimeIndex, unique that, and wrap the - # result with out TZ. - if self.tz is not None: - naive = type(self)(self._ndarray_values, copy=False) - else: - naive = self - result = super(DatetimeIndex, naive).unique(level=level) - return self._shallow_copy(result.values) + if level is not None: + self._validate_index_level(level) + + # call DatetimeArray method + result = DatetimeArrayMixin.unique(self) + return self._shallow_copy(result._data) def union(self, other): """ From 45c8161b2977912a16f8390e60fa3ccbc5f914e2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 12 Nov 2018 10:20:41 -0800 Subject: [PATCH 2/8] implement some tests for take, concat_same_type --- pandas/core/arrays/datetimelike.py | 16 +----- pandas/tests/arrays/test_datetimelike.py | 64 ++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4c18bf063dc98..9f41c90dda838 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -127,10 +127,6 @@ def asi8(self): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods - @property - def nbytes(self): - return self._data.nbytes - @property def nbytes(self): return self._data.nbytes @@ -203,6 +199,7 @@ def astype(self, dtype, copy=True): # ------------------------------------------------------------------ # ExtensionArray Interface # TODO: + # * _from_sequence # * argsort / _values_for_argsort # * _reduce @@ -252,7 +249,7 @@ def _concat_same_type(cls, to_concat): def copy(self, deep=False): values = self.asi8 if deep: - values = i8values.copy() + values = values.copy() return type(self)(values, dtype=self.dtype, freq=self.freq) def _values_for_factorize(self): @@ -264,15 +261,6 @@ def _from_factorized(cls, values, original): return cls(values, tz=original.tz, freq=original.freq) return cls(values, freq=original.freq) - @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): - arr = np.asarray(scalars, dtype=object) - if copy: - arr = arr.copy() - - # If necessary this will infer tz from dtype - return cls(arr, dtype=dtype) - # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index bb4022c9cac9a..56379c15c8312 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -175,6 +175,41 @@ def test_int_properties(self, datetime_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, datetime_index, tz_naive_fixture): + dti = datetime_index.tz_localize(tz_naive_fixture) + arr = DatetimeArray(dti) + + now = pd.Timestamp.now().tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + with pytest.raises(ValueError): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=pd.Period('2014Q1')) + + tz = None if dti.tz is not None else 'US/Eastern' + now = pd.Timestamp.now().tz_localize(tz) + with pytest.raises(TypeError): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) + + def test_concat_same_type_invalid(self, datetime_index): + # different timezones + dti = datetime_index + arr = DatetimeArray(dti) + + if arr.tz is None: + other = arr.tz_localize('UTC') + else: + other = arr.tz_localize(None) + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + class TestTimedeltaArray(object): def test_from_tdi(self): @@ -223,6 +258,35 @@ def test_int_properties(self, timedelta_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + now = pd.Timestamp.now() + with pytest.raises(ValueError): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D')) + + def test_concat_same_type_invalid(self, timedelta_index): + # different freqs + tdi = timedelta_index + arr = TimedeltaArray(tdi) + other = pd.timedelta_range('1D', periods=5, freq='2D') + # FIXME: TimedeltaArray should inherit freq='2D' without specifying it + other = TimedeltaArray(other, freq='2D') + assert other.freq != arr.freq + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + class TestPeriodArray(object): From 75f69442b016b21c4598518d3e6f9d97bb86c01a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 12 Nov 2018 12:24:09 -0800 Subject: [PATCH 3/8] Fixup typo, implement more tests --- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 72 +++++++++++++++++++++++- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1d13852f18978..0ab6c95fcdd8f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -555,7 +555,7 @@ def unique(self, level=None): self._validate_index_level(level) # call DatetimeArray method - result = DatetimeArrayMixin.unique(self) + result = DatetimeArray.unique(self) return self._shallow_copy(result._data) def union(self, other): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 56379c15c8312..ab0852fc6cc6e 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -56,7 +56,68 @@ def timedelta_index(request): return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) -class TestDatetimeArray(object): +class SharedTests(object): + index_cls = None + + def test_take(self): + data = np.arange(100, dtype='i8') + np.random.shuffle(data) + + idx = self.index_cls._simple_new(data, freq='D') + arr = self.array_cls(idx) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_take_fill(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D') + arr = self.array_cls(idx) + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT) + assert result[0] is pd.NaT + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2.0) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, + fill_value=pd.Timestamp.now().time) + + def test_concat_same_type(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT) + arr = self.array_cls(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + + tm.assert_index_equal(self.index_cls(result), expected) + + +class TestDatetimeArray(SharedTests): + index_cls = pd.DatetimeIndex + array_cls = DatetimeArray def test_array_object_dtype(self, tz_naive_fixture): # GH#23524 @@ -211,7 +272,10 @@ def test_concat_same_type_invalid(self, datetime_index): arr._concat_same_type([arr, other]) -class TestTimedeltaArray(object): +class TestTimedeltaArray(SharedTests): + index_cls = pd.TimedeltaIndex + array_cls = TimedeltaArray + def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArray(tdi) @@ -288,7 +352,9 @@ def test_concat_same_type_invalid(self, timedelta_index): arr._concat_same_type([arr, other]) -class TestPeriodArray(object): +class TestPeriodArray(SharedTests): + index_cls = pd.PeriodIndex + array_cls = PeriodArray def test_from_pi(self, period_index): pi = period_index From 0fb5029a176f9802d4e4e4b41be32bb197eefa12 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 12 Nov 2018 12:25:36 -0800 Subject: [PATCH 4/8] whitespace fixup --- pandas/tests/arrays/test_datetimelike.py | 36 ++++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index ab0852fc6cc6e..2eaa5fe72a324 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -58,60 +58,60 @@ def timedelta_index(request): class SharedTests(object): index_cls = None - + def test_take(self): data = np.arange(100, dtype='i8') np.random.shuffle(data) - + idx = self.index_cls._simple_new(data, freq='D') arr = self.array_cls(idx) - + takers = [1, 4, 94] result = arr.take(takers) expected = idx.take(takers) - + tm.assert_index_equal(self.index_cls(result), expected) - + takers = np.array([1, 4, 94]) result = arr.take(takers) expected = idx.take(takers) - + tm.assert_index_equal(self.index_cls(result), expected) - + def test_take_fill(self): data = np.arange(10, dtype='i8') - + idx = self.index_cls._simple_new(data, freq='D') arr = self.array_cls(idx) - + result = arr.take([-1, 1], allow_fill=True, fill_value=None) assert result[0] is pd.NaT - + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) assert result[0] is pd.NaT - + result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT) assert result[0] is pd.NaT - + with pytest.raises(ValueError): arr.take([0, 1], allow_fill=True, fill_value=2) - + with pytest.raises(ValueError): arr.take([0, 1], allow_fill=True, fill_value=2.0) - + with pytest.raises(ValueError): arr.take([0, 1], allow_fill=True, fill_value=pd.Timestamp.now().time) - + def test_concat_same_type(self): data = np.arange(10, dtype='i8') - + idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT) arr = self.array_cls(idx) - + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) - + tm.assert_index_equal(self.index_cls(result), expected) From 1a781abf2dd4e867e951603f7781b48071e3dfa9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 12 Nov 2018 12:38:21 -0800 Subject: [PATCH 5/8] suggested edits --- pandas/core/arrays/datetimelike.py | 13 ++++++++----- pandas/core/arrays/datetimes.py | 16 +--------------- pandas/core/indexes/datetimes.py | 1 + 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 9f41c90dda838..d5754f5b2b669 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -239,12 +239,17 @@ def take(self, indices, allow_fill=False, fill_value=None): @classmethod def _concat_same_type(cls, to_concat): - # for TimedeltaArray and PeriodArray; DatetimeArray overrides freqs = {x.freq for x in to_concat} assert len(freqs) == 1 freq = list(freqs)[0] + + # dtype captures tz for datetime64tz case + dtypes = {x.dtype for x in to_concat} + assert len(dtypes) == 1 + dtype = list(dtypes)[0] + values = np.concatenate([x.asi8 for x in to_concat]) - return cls._simple_new(values, freq=freq) + return cls(values, dtype=dtype, freq=freq) def copy(self, deep=False): values = self.asi8 @@ -257,9 +262,7 @@ def _values_for_factorize(self): @classmethod def _from_factorized(cls, values, original): - if is_datetime64tz_dtype(original): - return cls(values, tz=original.tz, freq=original.freq) - return cls(values, freq=original.freq) + return cls(values, dtype=original.dtype, freq=original.freq) # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b7d63b990206b..c1eb7e87ec8cf 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -21,8 +21,7 @@ is_object_dtype, is_int64_dtype, is_datetime64tz_dtype, - is_datetime64_dtype, - ensure_int64) + is_datetime64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries @@ -438,19 +437,6 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value - @classmethod - def _concat_same_type(cls, to_concat): - freqs = {x.freq for x in to_concat} - assert len(freqs) == 1 - freq = list(freqs)[0] - - tzs = {x.tz for x in to_concat} - assert len(tzs) == 1 - tz = list(tzs)[0] - - values = np.concatenate([x.asi8 for x in to_concat]) - return cls._simple_new(values, freq=freq, tz=tz) - # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0ab6c95fcdd8f..23446a57e7789 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -554,6 +554,7 @@ def unique(self, level=None): if level is not None: self._validate_index_level(level) + # TODO(DatetimeArray): change dispatch once inheritance is removed # call DatetimeArray method result = DatetimeArray.unique(self) return self._shallow_copy(result._data) From eceebc768b40810b35dd40aa1fca785cbc2d9639 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Nov 2018 06:39:26 -0600 Subject: [PATCH 6/8] REF: Simplify concat _concat._concat_datetimetz -> DatetimeIndex._concat_same_dtype -> DatetimeArray._concat_same_type --- pandas/core/arrays/datetimelike.py | 9 ++------ pandas/core/dtypes/concat.py | 8 +------- pandas/core/indexes/datetimelike.py | 16 ++++++++------- pandas/tests/arrays/test_datetimelike.py | 26 +++++++++++++----------- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d5754f5b2b669..762a02e865310 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -239,17 +239,12 @@ def take(self, indices, allow_fill=False, fill_value=None): @classmethod def _concat_same_type(cls, to_concat): - freqs = {x.freq for x in to_concat} - assert len(freqs) == 1 - freq = list(freqs)[0] - - # dtype captures tz for datetime64tz case dtypes = {x.dtype for x in to_concat} assert len(dtypes) == 1 dtype = list(dtypes)[0] values = np.concatenate([x.asi8 for x in to_concat]) - return cls(values, dtype=dtype, freq=freq) + return cls(values, dtype=dtype) def copy(self, deep=False): values = self.asi8 @@ -262,7 +257,7 @@ def _values_for_factorize(self): @classmethod def _from_factorized(cls, values, original): - return cls(values, dtype=original.dtype, freq=original.freq) + return cls(values, dtype=original.dtype) # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index bb4ab823069ee..ebfb41825ae0a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -476,13 +476,7 @@ def _concat_datetimetz(to_concat, name=None): all inputs must be DatetimeIndex it is used in DatetimeIndex.append also """ - # do not pass tz to set because tzlocal cannot be hashed - if len({str(x.dtype) for x in to_concat}) != 1: - raise ValueError('to_concat must have the same tz') - tz = to_concat[0].tz - # no need to localize because internal repr will not be changed - new_values = np.concatenate([x.asi8 for x in to_concat]) - return to_concat[0]._simple_new(new_values, tz=tz, name=name) + return to_concat[0]._concat_same_dtype(to_concat, name=name) def _concat_index_same_dtype(indexes, klass=None): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f824e525bd2cb..39bc7f4b85de2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -18,7 +18,6 @@ is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, is_integer, is_integer_dtype, is_list_like, is_object_dtype, is_period_dtype, is_scalar, is_string_dtype) -import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -690,17 +689,21 @@ def _concat_same_dtype(self, to_concat, name): """ attribs = self._get_attributes_dict() attribs['name'] = name + # do not pass tz to set because tzlocal cannot be hashed + if len({str(x.dtype) for x in to_concat}) != 1: + raise ValueError('to_concat must have the same tz') if not is_period_dtype(self): # reset freq attribs['freq'] = None - - if getattr(self, 'tz', None) is not None: - return _concat._concat_datetimetz(to_concat, name) + # TODO(DatetimeArray) + # - remove the .asi8 here + # - remove the _maybe_box_as_values + # - combine with the `else` block + new_data = self._concat_same_type(to_concat).asi8 else: - new_data = np.concatenate([c.asi8 for c in to_concat]) + new_data = type(self._values)._concat_same_type(to_concat) - new_data = self._maybe_box_as_values(new_data, **attribs) return self._simple_new(new_data, **attribs) def _maybe_box_as_values(self, values, **attribs): @@ -709,7 +712,6 @@ def _maybe_box_as_values(self, values, **attribs): # but others are not. When everyone is an ExtensionArray, this can # be removed. Currently used in # - sort_values - # - _concat_same_dtype return values def astype(self, dtype, copy=True): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 2eaa5fe72a324..a1242e2481fed 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -271,6 +271,20 @@ def test_concat_same_type_invalid(self, datetime_index): with pytest.raises(AssertionError): arr._concat_same_type([arr, other]) + def test_concat_same_type_different_freq(self): + # we *can* concatentate DTI with different freqs. + a = DatetimeArray(pd.date_range('2000', periods=2, freq='D', + tz='US/Central')) + b = DatetimeArray(pd.date_range('2000', periods=2, freq='H', + tz='US/Central')) + result = DatetimeArray._concat_same_type([a, b]) + expected = DatetimeArray(pd.to_datetime([ + '2000-01-01 00:00:00', '2000-01-02 00:00:00', + '2000-01-01 00:00:00', '2000-01-01 01:00:00', + ]).tz_localize("US/Central")) + + tm.assert_datetime_array_equal(result, expected) + class TestTimedeltaArray(SharedTests): index_cls = pd.TimedeltaIndex @@ -339,18 +353,6 @@ def test_take_fill_valid(self, timedelta_index): # fill_value Period invalid arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D')) - def test_concat_same_type_invalid(self, timedelta_index): - # different freqs - tdi = timedelta_index - arr = TimedeltaArray(tdi) - other = pd.timedelta_range('1D', periods=5, freq='2D') - # FIXME: TimedeltaArray should inherit freq='2D' without specifying it - other = TimedeltaArray(other, freq='2D') - assert other.freq != arr.freq - - with pytest.raises(AssertionError): - arr._concat_same_type([arr, other]) - class TestPeriodArray(SharedTests): index_cls = pd.PeriodIndex From a6065cc160092caefcb04180b0faebe505f6e05c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Nov 2018 06:44:15 -0600 Subject: [PATCH 7/8] copy --- pandas/core/arrays/datetimelike.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 762a02e865310..094c9c3df0bed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -247,9 +247,7 @@ def _concat_same_type(cls, to_concat): return cls(values, dtype=dtype) def copy(self, deep=False): - values = self.asi8 - if deep: - values = values.copy() + values = self.asi8.copy() return type(self)(values, dtype=self.dtype, freq=self.freq) def _values_for_factorize(self): From 3cb072e7ec33c860183c8c20bd3596e0a3bff2a0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Nov 2018 06:54:32 -0600 Subject: [PATCH 8/8] deduplicate copy --- pandas/core/arrays/period.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index a4f59b670f273..e46b00da6161e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -449,9 +449,6 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def copy(self, deep=False): - return type(self)(self._data.copy(), freq=self.freq) - def value_counts(self, dropna=False): from pandas import Series, PeriodIndex