pandas-dev · jbrockmendel · Oct 30, 2018 · Oct 31, 2018 · Oct 31, 2018 · Nov 2, 2018
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -2,7 +2,7 @@
                    ExtensionOpsMixin,
                    ExtensionScalarOpsMixin)
 from .categorical import Categorical  # noqa
-from .datetimes import DatetimeArrayMixin  # noqa
+from .datetimes import DatetimeArray  # noqa
 from .interval import IntervalArray  # noqa
 from .period import PeriodArray, period_array  # noqa
 from .timedeltas import TimedeltaArrayMixin  # noqa

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -39,7 +39,7 @@
 from pandas.core.dtypes.missing import isna
 
 import pandas.core.common as com
-from pandas.core.algorithms import checked_add_with_arr
+from pandas.core.algorithms import checked_add_with_arr, take
 
 from .base import ExtensionOpsMixin
 from pandas.util._decorators import deprecate_kwarg
@@ -127,6 +127,10 @@ def asi8(self):
     # ------------------------------------------------------------------
     # Array-like Methods
 
+    @property
+    def nbytes(self):
+        return self.asi8.nbytes
+
     @property
     def shape(self):
         return (len(self),)
@@ -192,6 +196,107 @@ def astype(self, dtype, copy=True):
             return self._box_values(self.asi8)
         return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)
 
+    # ------------------------------------------------------------------
+    # ExtensionArray Interface
+    # isna
+    # __getitem__
+    # __len__
+    # nbytes
+    # take
+    # _concat_same_type
+    # copy
+    # _from_factorized
+    # factorize / _values_for_factorize
+    # _from_sequence
+    # unique
+    #
+    # dtype
+    #
+    # dropna
+    #
+    #* _formatting_values
+    #* fillna
+    #* argsort / _values_for_argsort
+    #* _reduce
+
+    def unique(self):
+        from pandas.core.algorithms import unique1d
+        result = unique1d(self.asi8)
+        return self._shallow_copy(result)
+
+    def _validate_fill_value(self, fill_value):
+        """
+        If a fill_value is passed to `take` convert it to an i8 representation,
+        raising ValueError if this is not possible.
+
+        Parameters
+        ----------
+        fill_value : object
+
+        Returns
+        -------
+        fill_value : np.int64
+
+        Raises
+        ------
+        ValueError
+        """
+        raise AbstractMethodError(self)
+
+    def take(self, indices, allow_fill=False, fill_value=None):
+        if allow_fill:
+            fill_value = self._validate_fill_value(fill_value)
+
+        new_values = take(self.asi8,
+                          indices,
+                          allow_fill=allow_fill,
+                          fill_value=fill_value)
+
+        # TODO: use "infer"?  Why does not passing freq cause
+        #  failures in py37 but not py27?
+        freq = self.freq if is_period_dtype(self) else None
+        return self._shallow_copy(new_values, freq=freq)
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        # for TimedeltaArray and PeriodArray; DatetimeArray overrides
+        freqs = {x.freq for x in to_concat}
+        assert len(freqs) == 1
+        freq = list(freqs)[0]
+        values = np.concatenate([x.asi8 for x in to_concat])
+        return cls._simple_new(values, freq=freq)
+
+    def copy(self, deep=False):
+        # TODO: should `deep` determine whether we copy self.asi8?
+        if is_datetime64tz_dtype(self):
+            return type(self)(self.asi8.copy(), tz=self.tz, freq=self.freq)
+        return type(self)(self.asi8.copy(), freq=self.freq)
+
+    # Following how PeriodArray does this
+    # TODO: ignoring `type`?
+    def view(self, dtype=None, type=None):
+        if dtype is None or dtype is __builtins__['type'](self):
+            return self
+        return self._ndarray_values.view(dtype=dtype)
+
+    def _values_for_factorize(self):
+        return self.asi8, iNaT
+
+    @classmethod
+    def _from_factorized(cls, values, original):
+        if is_datetime64tz_dtype(original):
+            return cls(values, tz=original.tz, freq=original.freq)
+        return cls(values, freq=original.freq)
+
+    @classmethod
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        arr = np.asarray(scalars, dtype=object)
+        if copy:
+            arr = arr.copy()
+
+        # If necessary this will infer tz from dtype
+        return cls(arr, dtype=dtype)
+
     # ------------------------------------------------------------------
     # Null Handling
 
@@ -736,8 +841,8 @@ def __rsub__(self, other):
                 # we need to wrap in DatetimeArray/Index and flip the operation
                 if not isinstance(other, DatetimeLikeArrayMixin):
                     # Avoid down-casting DatetimeIndex
-                    from pandas.core.arrays import DatetimeArrayMixin
-                    other = DatetimeArrayMixin(other)
+                    from pandas.core.arrays import DatetimeArray
+                    other = DatetimeArray(other)
                 return other - self
             elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and
                   not is_datetime64_any_dtype(other)):

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -12,7 +12,7 @@
     conversion, fields, timezones,
     resolution as libresolution)
 
-from pandas.util._decorators import cache_readonly
+from pandas.util._decorators import cache_readonly, Appender
 from pandas.errors import PerformanceWarning
 from pandas import compat
 
@@ -34,6 +34,7 @@
 from pandas.tseries.offsets import Tick, generate_range
 
 from pandas.core.arrays import datetimelike as dtl
+from pandas.core.arrays.base import ExtensionArray
 
 
 _midnight = time(0, 0)
@@ -122,7 +123,7 @@ def wrapper(self, other):
                 except ValueError:
                     other = np.array(other, dtype=np.object_)
             elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries,
-                                        DatetimeArrayMixin)):
+                                        DatetimeArray)):
                 # Following Timestamp convention, __eq__ is all-False
                 # and __ne__ is all True, others raise TypeError.
                 return ops.invalid_comparison(self, other, op)
@@ -158,7 +159,7 @@ def wrapper(self, other):
     return compat.set_function_name(wrapper, opname, cls)
 
 
-class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
+class DatetimeArray(dtl.DatetimeLikeArrayMixin, ExtensionArray):
     """
     Assumes that subclass __new__/__init__ defines:
         tz
@@ -221,7 +222,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
         # if dtype has an embedded tz, capture it
         tz = dtl.validate_tz_from_dtype(dtype, tz)
 
-        if isinstance(values, DatetimeArrayMixin):
+        if isinstance(values, DatetimeArray):
             # extract nanosecond unix timestamps
             values = values.asi8
         if values.dtype == 'i8':
@@ -295,7 +296,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
 
             if tz is not None and index.tz is None:
                 arr = conversion.tz_localize_to_utc(
-                    ensure_int64(index.values),
+                    ensure_int64(index.asi8),
                     tz, ambiguous=ambiguous)
 
                 index = cls(arr)
@@ -318,7 +319,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
         if not right_closed and len(index) and index[-1] == end:
             index = index[:-1]
 
-        return cls._simple_new(index.values, freq=freq, tz=tz)
+        return cls._simple_new(index.asi8, freq=freq, tz=tz)
 
     # -----------------------------------------------------------------
     # Descriptive Properties
@@ -411,6 +412,38 @@ def __iter__(self):
             for v in converted:
                 yield v
 
+    # ----------------------------------------------------------------
+    # ExtensionArray Interface
+
+    @property
+    def _ndarray_values(self):
+        return self._data
+
+    @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
+    def _validate_fill_value(self, fill_value):
+        if isna(fill_value):
+            fill_value = iNaT
+        elif isinstance(fill_value, (datetime, np.datetime64)):
+            self._assert_tzawareness_compat(fill_value)
+            fill_value = Timestamp(fill_value).value
+        else:
+            raise ValueError("'fill_value' should be a Timestamp. "
+                             "Got '{got}'.".format(got=fill_value))
+        return fill_value
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        freqs = {x.freq for x in to_concat}
+        assert len(freqs) == 1
+        freq = list(freqs)[0]
+
+        tzs = {x.tz for x in to_concat}
+        assert len(tzs) == 1
+        tz = list(tzs)[0]
+
+        values = np.concatenate([x.asi8 for x in to_concat])
+        return cls._simple_new(values, freq=freq, tz=tz)
+
     # -----------------------------------------------------------------
     # Comparison Methods
 
@@ -1378,8 +1411,8 @@ def to_julian_date(self):
                  ) / 24.0)
 
 
-DatetimeArrayMixin._add_comparison_ops()
-DatetimeArrayMixin._add_datetimelike_methods()
+DatetimeArray._add_comparison_ops()
+DatetimeArray._add_datetimelike_methods()
 
 
 def _generate_regular_range(cls, start, end, periods, freq):

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -216,14 +216,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
         ordinals = libperiod.extract_ordinals(periods, freq)
         return cls(ordinals, freq=freq)
 
-    def _values_for_factorize(self):
-        return self.asi8, iNaT
-
-    @classmethod
-    def _from_factorized(cls, values, original):
-        # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray
-        return cls(values, freq=original.freq)
-
     @classmethod
     def _from_datetime64(cls, data, freq, tz=None):
         """Construct a PeriodArray from a datetime64 array
@@ -262,14 +254,6 @@ def _generate_range(cls, start, end, periods, freq, fields):
 
         return subarr, freq
 
-    @classmethod
-    def _concat_same_type(cls, to_concat):
-        freq = {x.freq for x in to_concat}
-        assert len(freq) == 1
-        freq = list(freq)[0]
-        values = np.concatenate([x._data for x in to_concat])
-        return cls(values, freq=freq)
-
     # --------------------------------------------------------------------
     # Data / Attributes
     @property
@@ -379,22 +363,24 @@ def __setitem__(
             raise TypeError(msg)
         self._data[key] = value
 
+    @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
+    def _validate_fill_value(self, fill_value):
+        if isna(fill_value):
+            fill_value = iNaT
+        elif isinstance(fill_value, Period):
+            if fill_value.freq != self.freq:
+                msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr,
+                                                  fill_value.freqstr)
+                raise IncompatibleFrequency(msg)
+            fill_value = fill_value.ordinal
+        else:
+            raise ValueError("'fill_value' should be a Period. "
+                             "Got '{got}'.".format(got=fill_value))
+        return fill_value
+
     def take(self, indices, allow_fill=False, fill_value=None):
         if allow_fill:
-            if isna(fill_value):
-                fill_value = iNaT
-            elif isinstance(fill_value, Period):
-                if self.freq != fill_value.freq:
-                    msg = DIFFERENT_FREQ_INDEX.format(
-                        self.freq.freqstr,
-                        fill_value.freqstr
-                    )
-                    raise IncompatibleFrequency(msg)
-
-                fill_value = fill_value.ordinal
-            else:
-                msg = "'fill_value' should be a Period. Got '{}'."
-                raise ValueError(msg.format(fill_value))
+            fill_value = self._validate_fill_value(fill_value)
 
         new_values = algos.take(self._data,
                                 indices,
@@ -438,9 +424,6 @@ def fillna(self, value=None, method=None, limit=None):
             new_values = self.copy()
         return new_values
 
-    def copy(self, deep=False):
-        return type(self)(self._data.copy(), freq=self.freq)
-
     def value_counts(self, dropna=False):
         from pandas import Series, PeriodIndex
 
@@ -582,7 +565,7 @@ def to_timestamp(self, freq=None, how='start'):
         -------
         DatetimeArray/Index
         """
-        from pandas.core.arrays import DatetimeArrayMixin
+        from pandas.core.arrays import DatetimeArray
 
         how = libperiod._validate_end_alias(how)
 
@@ -606,7 +589,7 @@ def to_timestamp(self, freq=None, how='start'):
         new_data = self.asfreq(freq, how=how)
 
         new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
-        return DatetimeArrayMixin(new_data, freq='infer')
+        return DatetimeArray(new_data, freq='infer')
 
     # ------------------------------------------------------------------
     # Formatting