Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: DatetimeArray+TimedeltaArray #23415

Closed
wants to merge 10 commits into from
2 changes: 1 addition & 1 deletion pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ExtensionOpsMixin,
ExtensionScalarOpsMixin)
from .categorical import Categorical # noqa
from .datetimes import DatetimeArrayMixin # noqa
from .datetimes import DatetimeArray # noqa
from .interval import IntervalArray # noqa
from .period import PeriodArray, period_array # noqa
from .timedeltas import TimedeltaArrayMixin # noqa
Expand Down
120 changes: 115 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pandas.core.dtypes.missing import isna

import pandas.core.common as com
from pandas.core.algorithms import checked_add_with_arr
from pandas.core.algorithms import checked_add_with_arr, take

from .base import ExtensionOpsMixin
from pandas.util._decorators import deprecate_kwarg
Expand Down Expand Up @@ -122,16 +122,22 @@ def __iter__(self):
@property
def values(self):
""" return the underlying data as an ndarray """
return self._data.view(np.ndarray)
if is_timedelta64_dtype(self):
return self._data.view(np.ndarray)
return self._data

@property
def asi8(self):
# do not cache or you'll create a memory leak
return self.values.view('i8')
return self._data.view('i8')

# ------------------------------------------------------------------
# Array-like Methods

@property
def nbytes(self):
return self.asi8.nbytes

@property
def shape(self):
return (len(self),)
Expand Down Expand Up @@ -197,9 +203,113 @@ def astype(self, dtype, copy=True):
return self._box_values(self.asi8)
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)

# ------------------------------------------------------------------
# ExtensionArray Interface
jreback marked this conversation as resolved.
Show resolved Hide resolved
# isna
# __getitem__
# __len__
# nbytes
# take
# _concat_same_type
# copy
# _from_factorized
# factorize / _values_for_factorize
# _from_sequence
# unique
#
# dtype
#
# dropna
#
#* _formatting_values
#* fillna
#* argsort / _values_for_argsort
#* _reduce

def unique(self):
from pandas.core.algorithms import unique1d
result = unique1d(self.asi8)
return self._shallow_copy(result)

def _validate_fill_value(self, fill_value):
"""
If a fill_value is passed to `take` convert it to an i8 representation,
raising ValueError if this is not possible.

Parameters
----------
fill_value : object

Returns
-------
fill_value : np.int64

Raises
------
ValueError
"""
raise AbstractMethodError(self)

def take(self, indices, allow_fill=False, fill_value=None):
if allow_fill:
fill_value = self._validate_fill_value(fill_value)

new_values = take(self.asi8,
indices,
allow_fill=allow_fill,
fill_value=fill_value)

# TODO: use "infer"? Why does not passing freq cause
# failures in py37 but not py27?
freq = self.freq if is_period_dtype(self) else None
return self._shallow_copy(new_values, freq=freq)

@classmethod
def _concat_same_type(cls, to_concat):
# for TimedeltaArray and PeriodArray; DatetimeArray overrides
freqs = {x.freq for x in to_concat}
assert len(freqs) == 1
freq = list(freqs)[0]
values = np.concatenate([x.asi8 for x in to_concat])
return cls._simple_new(values, freq=freq)

def copy(self, deep=False):
# TODO: should `deep` determine whether we copy self.asi8?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes like

values = self.asi8
if deep:
    values = values.copy()
....

if is_datetime64tz_dtype(self):
return type(self)(self.asi8.copy(), tz=self.tz, freq=self.freq)
return type(self)(self.asi8.copy(), freq=self.freq)

# Following how PeriodArray does this
# TODO: ignoring `type`?
def view(self, dtype=None, type=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this so complicated? do we really need this method?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I implemented this because tests were raising AttributeErrors asking for it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah we really can' support .view generally with EA. maybe just leave this on Index.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really this PR is still sufficiently early in the "WIP" phase it isn't worth spending much time on ATM.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok sure

if dtype is None or dtype is __builtins__['type'](self):
return self
return self._ndarray_values.view(dtype=dtype)

def _values_for_factorize(self):
return self.asi8, iNaT

@classmethod
def _from_factorized(cls, values, original):
if is_datetime64tz_dtype(original):
return cls(values, tz=original.tz, freq=original.freq)
return cls(values, freq=original.freq)

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
arr = np.asarray(scalars, dtype=object)
if copy:
arr = arr.copy()

# If necessary this will infer tz from dtype
return cls(arr, dtype=dtype)

# ------------------------------------------------------------------
# Null Handling

def isna(self):
return self._isnan

@property # NB: override with cache_readonly in immutable subclasses
def _isnan(self):
""" return if each value is nan"""
Expand Down Expand Up @@ -738,8 +848,8 @@ def __rsub__(self, other):
# we need to wrap in DatetimeArray/Index and flip the operation
if not isinstance(other, DatetimeLikeArrayMixin):
# Avoid down-casting DatetimeIndex
from pandas.core.arrays import DatetimeArrayMixin
other = DatetimeArrayMixin(other)
from pandas.core.arrays import DatetimeArray
other = DatetimeArray(other)
return other - self
elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and
not is_datetime64_any_dtype(other)):
Expand Down
53 changes: 43 additions & 10 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
conversion, fields, timezones,
resolution as libresolution)

from pandas.util._decorators import cache_readonly
from pandas.util._decorators import cache_readonly, Appender
from pandas.errors import PerformanceWarning
from pandas import compat

Expand All @@ -34,6 +34,7 @@
from pandas.tseries.offsets import Tick, generate_range

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays.base import ExtensionArray


_midnight = time(0, 0)
Expand Down Expand Up @@ -119,7 +120,7 @@ def wrapper(self, other):
if isinstance(other, list):
# FIXME: This can break for object-dtype with mixed types
other = type(self)(other)
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)):
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, DatetimeArray)):
# Following Timestamp convention, __eq__ is all-False
# and __ne__ is all True, others raise TypeError.
return ops.invalid_comparison(self, other, op)
Expand All @@ -132,7 +133,7 @@ def wrapper(self, other):
return ops.invalid_comparison(self, other, op)
else:
self._assert_tzawareness_compat(other)
result = meth(self, np.asarray(other))
result = meth(self, type(self)(other))#np.asarray(other))
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

result = com.values_from_object(result)

Expand All @@ -150,7 +151,7 @@ def wrapper(self, other):
return compat.set_function_name(wrapper, opname, cls)


class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
class DatetimeArray(dtl.DatetimeLikeArrayMixin, ExtensionArray):
"""
Assumes that subclass __new__/__init__ defines:
tz
Expand Down Expand Up @@ -209,7 +210,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if isinstance(values, DatetimeArrayMixin):
if isinstance(values, DatetimeArray):
# extract nanosecond unix timestamps
values = values.asi8
if values.dtype == 'i8':
Expand Down Expand Up @@ -283,7 +284,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,

if tz is not None and index.tz is None:
arr = conversion.tz_localize_to_utc(
ensure_int64(index.values),
ensure_int64(index.asi8),
tz, ambiguous=ambiguous)

index = cls(arr)
Expand All @@ -306,7 +307,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
if not right_closed and len(index) and index[-1] == end:
index = index[:-1]

return cls._simple_new(index.values, freq=freq, tz=tz)
return cls._simple_new(index.asi8, freq=freq, tz=tz)

# -----------------------------------------------------------------
# Descriptive Properties
Expand Down Expand Up @@ -399,6 +400,38 @@ def __iter__(self):
for v in converted:
yield v

# ----------------------------------------------------------------
# ExtensionArray Interface

@property
def _ndarray_values(self):
return self._data

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
fill_value = iNaT
elif isinstance(fill_value, (datetime, np.datetime64)):
self._assert_tzawareness_compat(fill_value)
fill_value = Timestamp(fill_value).value
else:
raise ValueError("'fill_value' should be a Timestamp. "
"Got '{got}'.".format(got=fill_value))
return fill_value

@classmethod
def _concat_same_type(cls, to_concat):
freqs = {x.freq for x in to_concat}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you do this freq dance a few times, maybe a helper function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take a look.

assert len(freqs) == 1
freq = list(freqs)[0]

tzs = {x.tz for x in to_concat}
assert len(tzs) == 1
tz = list(tzs)[0]

values = np.concatenate([x.asi8 for x in to_concat])
return cls._simple_new(values, freq=freq, tz=tz)

# -----------------------------------------------------------------
# Comparison Methods

Expand Down Expand Up @@ -874,7 +907,7 @@ def to_period(self, freq=None):

freq = get_period_alias(freq)

return PeriodArray._from_datetime64(self.values, freq, tz=self.tz)
return PeriodArray._from_datetime64(self.asi8, freq, tz=self.tz)

def to_perioddelta(self, freq):
"""
Expand Down Expand Up @@ -1366,8 +1399,8 @@ def to_julian_date(self):
) / 24.0)


DatetimeArrayMixin._add_comparison_ops()
DatetimeArrayMixin._add_datetimelike_methods()
DatetimeArray._add_comparison_ops()
DatetimeArray._add_datetimelike_methods()


def _generate_regular_range(cls, start, end, periods, freq):
Expand Down
Loading