Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: DatetimeArray+TimedeltaArray #23415

Closed
wants to merge 10 commits into from
2 changes: 1 addition & 1 deletion pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ExtensionOpsMixin,
ExtensionScalarOpsMixin)
from .categorical import Categorical # noqa
from .datetimes import DatetimeArrayMixin # noqa
from .datetimes import DatetimeArray # noqa
from .interval import IntervalArray # noqa
from .period import PeriodArray, period_array # noqa
from .timedeltas import TimedeltaArrayMixin # noqa
Expand Down
111 changes: 108 additions & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pandas.core.dtypes.missing import isna

import pandas.core.common as com
from pandas.core.algorithms import checked_add_with_arr
from pandas.core.algorithms import checked_add_with_arr, take

from .base import ExtensionOpsMixin
from pandas.util._decorators import deprecate_kwarg
Expand Down Expand Up @@ -127,6 +127,10 @@ def asi8(self):
# ------------------------------------------------------------------
# Array-like Methods

@property
def nbytes(self):
return self.asi8.nbytes

@property
def shape(self):
return (len(self),)
Expand Down Expand Up @@ -192,6 +196,107 @@ def astype(self, dtype, copy=True):
return self._box_values(self.asi8)
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)

# ------------------------------------------------------------------
# ExtensionArray Interface
jreback marked this conversation as resolved.
Show resolved Hide resolved
# isna
# __getitem__
# __len__
# nbytes
# take
# _concat_same_type
# copy
# _from_factorized
# factorize / _values_for_factorize
# _from_sequence
# unique
#
# dtype
#
# dropna
#
#* _formatting_values
#* fillna
#* argsort / _values_for_argsort
#* _reduce

def unique(self):
from pandas.core.algorithms import unique1d
result = unique1d(self.asi8)
return self._shallow_copy(result)

def _validate_fill_value(self, fill_value):
"""
If a fill_value is passed to `take` convert it to an i8 representation,
raising ValueError if this is not possible.

Parameters
----------
fill_value : object

Returns
-------
fill_value : np.int64

Raises
------
ValueError
"""
raise AbstractMethodError(self)

def take(self, indices, allow_fill=False, fill_value=None):
if allow_fill:
fill_value = self._validate_fill_value(fill_value)

new_values = take(self.asi8,
indices,
allow_fill=allow_fill,
fill_value=fill_value)

# TODO: use "infer"? Why does not passing freq cause
# failures in py37 but not py27?
freq = self.freq if is_period_dtype(self) else None
return self._shallow_copy(new_values, freq=freq)

@classmethod
def _concat_same_type(cls, to_concat):
# for TimedeltaArray and PeriodArray; DatetimeArray overrides
freqs = {x.freq for x in to_concat}
assert len(freqs) == 1
freq = list(freqs)[0]
values = np.concatenate([x.asi8 for x in to_concat])
return cls._simple_new(values, freq=freq)

def copy(self, deep=False):
# TODO: should `deep` determine whether we copy self.asi8?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes like

values = self.asi8
if deep:
    values = values.copy()
....

if is_datetime64tz_dtype(self):
return type(self)(self.asi8.copy(), tz=self.tz, freq=self.freq)
return type(self)(self.asi8.copy(), freq=self.freq)

# Following how PeriodArray does this
# TODO: ignoring `type`?
def view(self, dtype=None, type=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this so complicated? do we really need this method?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I implemented this because tests were raising AttributeErrors asking for it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah we really can' support .view generally with EA. maybe just leave this on Index.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really this PR is still sufficiently early in the "WIP" phase it isn't worth spending much time on ATM.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok sure

if dtype is None or dtype is __builtins__['type'](self):
return self
return self._ndarray_values.view(dtype=dtype)

def _values_for_factorize(self):
return self.asi8, iNaT

@classmethod
def _from_factorized(cls, values, original):
if is_datetime64tz_dtype(original):
return cls(values, tz=original.tz, freq=original.freq)
return cls(values, freq=original.freq)

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
arr = np.asarray(scalars, dtype=object)
if copy:
arr = arr.copy()

# If necessary this will infer tz from dtype
return cls(arr, dtype=dtype)

# ------------------------------------------------------------------
# Null Handling

Expand Down Expand Up @@ -736,8 +841,8 @@ def __rsub__(self, other):
# we need to wrap in DatetimeArray/Index and flip the operation
if not isinstance(other, DatetimeLikeArrayMixin):
# Avoid down-casting DatetimeIndex
from pandas.core.arrays import DatetimeArrayMixin
other = DatetimeArrayMixin(other)
from pandas.core.arrays import DatetimeArray
other = DatetimeArray(other)
return other - self
elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and
not is_datetime64_any_dtype(other)):
Expand Down
49 changes: 41 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
conversion, fields, timezones,
resolution as libresolution)

from pandas.util._decorators import cache_readonly
from pandas.util._decorators import cache_readonly, Appender
from pandas.errors import PerformanceWarning
from pandas import compat

Expand All @@ -34,6 +34,7 @@
from pandas.tseries.offsets import Tick, generate_range

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays.base import ExtensionArray


_midnight = time(0, 0)
Expand Down Expand Up @@ -122,7 +123,7 @@ def wrapper(self, other):
except ValueError:
other = np.array(other, dtype=np.object_)
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries,
DatetimeArrayMixin)):
DatetimeArray)):
# Following Timestamp convention, __eq__ is all-False
# and __ne__ is all True, others raise TypeError.
return ops.invalid_comparison(self, other, op)
Expand Down Expand Up @@ -158,7 +159,7 @@ def wrapper(self, other):
return compat.set_function_name(wrapper, opname, cls)


class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
class DatetimeArray(dtl.DatetimeLikeArrayMixin, ExtensionArray):
"""
Assumes that subclass __new__/__init__ defines:
tz
Expand Down Expand Up @@ -221,7 +222,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if isinstance(values, DatetimeArrayMixin):
if isinstance(values, DatetimeArray):
# extract nanosecond unix timestamps
values = values.asi8
if values.dtype == 'i8':
Expand Down Expand Up @@ -295,7 +296,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,

if tz is not None and index.tz is None:
arr = conversion.tz_localize_to_utc(
ensure_int64(index.values),
ensure_int64(index.asi8),
tz, ambiguous=ambiguous)

index = cls(arr)
Expand All @@ -318,7 +319,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
if not right_closed and len(index) and index[-1] == end:
index = index[:-1]

return cls._simple_new(index.values, freq=freq, tz=tz)
return cls._simple_new(index.asi8, freq=freq, tz=tz)

# -----------------------------------------------------------------
# Descriptive Properties
Expand Down Expand Up @@ -411,6 +412,38 @@ def __iter__(self):
for v in converted:
yield v

# ----------------------------------------------------------------
# ExtensionArray Interface

@property
def _ndarray_values(self):
return self._data

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
fill_value = iNaT
elif isinstance(fill_value, (datetime, np.datetime64)):
self._assert_tzawareness_compat(fill_value)
fill_value = Timestamp(fill_value).value
else:
raise ValueError("'fill_value' should be a Timestamp. "
"Got '{got}'.".format(got=fill_value))
return fill_value

@classmethod
def _concat_same_type(cls, to_concat):
freqs = {x.freq for x in to_concat}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you do this freq dance a few times, maybe a helper function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take a look.

assert len(freqs) == 1
freq = list(freqs)[0]

tzs = {x.tz for x in to_concat}
assert len(tzs) == 1
tz = list(tzs)[0]

values = np.concatenate([x.asi8 for x in to_concat])
return cls._simple_new(values, freq=freq, tz=tz)

# -----------------------------------------------------------------
# Comparison Methods

Expand Down Expand Up @@ -1378,8 +1411,8 @@ def to_julian_date(self):
) / 24.0)


DatetimeArrayMixin._add_comparison_ops()
DatetimeArrayMixin._add_datetimelike_methods()
DatetimeArray._add_comparison_ops()
DatetimeArray._add_datetimelike_methods()


def _generate_regular_range(cls, start, end, periods, freq):
Expand Down
53 changes: 18 additions & 35 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,14 +216,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
ordinals = libperiod.extract_ordinals(periods, freq)
return cls(ordinals, freq=freq)

def _values_for_factorize(self):
return self.asi8, iNaT

@classmethod
def _from_factorized(cls, values, original):
# type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray
return cls(values, freq=original.freq)

@classmethod
def _from_datetime64(cls, data, freq, tz=None):
"""Construct a PeriodArray from a datetime64 array
Expand Down Expand Up @@ -262,14 +254,6 @@ def _generate_range(cls, start, end, periods, freq, fields):

return subarr, freq

@classmethod
def _concat_same_type(cls, to_concat):
freq = {x.freq for x in to_concat}
assert len(freq) == 1
freq = list(freq)[0]
values = np.concatenate([x._data for x in to_concat])
return cls(values, freq=freq)

# --------------------------------------------------------------------
# Data / Attributes
@property
Expand Down Expand Up @@ -379,22 +363,24 @@ def __setitem__(
raise TypeError(msg)
self._data[key] = value

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
fill_value = iNaT
elif isinstance(fill_value, Period):
if fill_value.freq != self.freq:
msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr,
fill_value.freqstr)
raise IncompatibleFrequency(msg)
fill_value = fill_value.ordinal
else:
raise ValueError("'fill_value' should be a Period. "
"Got '{got}'.".format(got=fill_value))
return fill_value

def take(self, indices, allow_fill=False, fill_value=None):
if allow_fill:
if isna(fill_value):
fill_value = iNaT
elif isinstance(fill_value, Period):
if self.freq != fill_value.freq:
msg = DIFFERENT_FREQ_INDEX.format(
self.freq.freqstr,
fill_value.freqstr
)
raise IncompatibleFrequency(msg)

fill_value = fill_value.ordinal
else:
msg = "'fill_value' should be a Period. Got '{}'."
raise ValueError(msg.format(fill_value))
fill_value = self._validate_fill_value(fill_value)

new_values = algos.take(self._data,
indices,
Expand Down Expand Up @@ -438,9 +424,6 @@ def fillna(self, value=None, method=None, limit=None):
new_values = self.copy()
return new_values

def copy(self, deep=False):
return type(self)(self._data.copy(), freq=self.freq)

def value_counts(self, dropna=False):
from pandas import Series, PeriodIndex

Expand Down Expand Up @@ -582,7 +565,7 @@ def to_timestamp(self, freq=None, how='start'):
-------
DatetimeArray/Index
"""
from pandas.core.arrays import DatetimeArrayMixin
from pandas.core.arrays import DatetimeArray

how = libperiod._validate_end_alias(how)

Expand All @@ -606,7 +589,7 @@ def to_timestamp(self, freq=None, how='start'):
new_data = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
return DatetimeArrayMixin(new_data, freq='infer')
return DatetimeArray(new_data, freq='infer')

# ------------------------------------------------------------------
# Formatting
Expand Down
Loading