Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: Tests and Helpers for Datetime/Period Arrays #23502

Merged
merged 13 commits into from
Nov 9, 2018
Merged
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ class _BaseOffset(object):

def __add__(self, other):
if getattr(other, "_typ", None) in ["datetimeindex", "periodindex",
"datetimearray", "periodarray",
"series", "period", "dataframe"]:
# defer to the other class's implementation
return other + self
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ def astype(self, dtype, copy=True):
# ------------------------------------------------------------------
# Null Handling

def isna(self):
return self._isnan

@property # NB: override with cache_readonly in immutable subclasses
def _isnan(self):
""" return if each value is nan"""
Expand Down
21 changes: 17 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,27 +118,36 @@ def wrapper(self, other):
else:
if isinstance(other, list):
# FIXME: This can break for object-dtype with mixed types
other = type(self)(other)
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)):
try:
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
other = type(self)(other)
except ValueError:
other = np.array(other, dtype=np.object_)
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries,
DatetimeArrayMixin)):
# Following Timestamp convention, __eq__ is all-False
# and __ne__ is all True, others raise TypeError.
return ops.invalid_comparison(self, other, op)

if is_object_dtype(other):
result = op(self.astype('O'), np.array(other))
o_mask = isna(other)
elif not (is_datetime64_dtype(other) or
is_datetime64tz_dtype(other)):
# e.g. is_timedelta64_dtype(other)
return ops.invalid_comparison(self, other, op)
else:
self._assert_tzawareness_compat(other)
result = meth(self, np.asarray(other))
if not hasattr(other, 'asi8'):
# ndarray, Series
other = type(self)(other)
result = meth(self, other)
o_mask = other._isnan
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved

result = com.values_from_object(result)

# Make sure to pass an array to result[...]; indexing with
# Series breaks with older version of numpy
o_mask = np.array(isna(other))
o_mask = np.array(o_mask)
if o_mask.any():
result[o_mask] = nat_result

Expand All @@ -157,6 +166,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
_freq
_data
"""
_typ = "datetimearray"
_bool_ops = ['is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end', 'is_year_start',
'is_year_end', 'is_leap_year']
Expand All @@ -166,6 +176,9 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
# by returning NotImplemented
timetuple = None

# ensure that operations with numpy arrays defer to our implementation
__array_priority__ = 1000
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved

# -----------------------------------------------------------------
# Constructors

Expand Down
3 changes: 0 additions & 3 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,6 @@ def take(self, indices, allow_fill=False, fill_value=None):

return type(self)(new_values, self.freq)

def isna(self):
return self._data == iNaT

def fillna(self, value=None, method=None, limit=None):
# TODO(#20300)
# To avoid converting to object, we re-implement here with the changes
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def wrapper(self, other):


class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin):
_typ = "timedeltaarray"

@property
def _box_func(self):
return lambda x: Timedelta(x, unit='ns')
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/dtypes/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def _check(cls, inst):
('sparse_array', 'sparse_series'))
ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ",
("categorical"))
ABCDatetimeArray = create_pandas_abc_type("ABCDatetimeArray", "_typ",
("datetimearray"))
ABCTimedeltaArray = create_pandas_abc_type("ABCTimedeltaArray", "_typ",
("timedeltaarray"))
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
ABCPeriodArray = create_pandas_abc_type("ABCPeriodArray", "_typ",
("periodarray", ))
ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", ))
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/arithmetic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd

from pandas.compat import long
from pandas.core.arrays import PeriodArray, DatetimeArrayMixin as DatetimeArray


@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
Expand Down Expand Up @@ -171,3 +172,21 @@ def box_df_broadcast_failure(request):
the DataFrame operation tries to broadcast incorrectly.
"""
return request.param


@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, PeriodArray],
ids=lambda x: x.__name__)
def pbox(request):
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
"""
Like `box`, but specific to PeriodDtype for also testing PeriodArray
"""
return request.param


@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, DatetimeArray],
ids=lambda x: x.__name__)
def dbox(request):
"""
Like `box`, but specific to datetime64 for also testing DatetimeArray
"""
return request.param
20 changes: 10 additions & 10 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,10 +1037,10 @@ def test_dti_add_sub_float(self, op, other):
with pytest.raises(TypeError):
op(dti, other)

def test_dti_add_timestamp_raises(self, box):
def test_dti_add_timestamp_raises(self, dbox):
# GH#22163 ensure DataFrame doesn't cast Timestamp to i8
idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
idx = tm.box_expected(idx, box)
idx = tm.box_expected(idx, dbox)
msg = "cannot add"
with tm.assert_raises_regex(TypeError, msg):
idx + Timestamp('2011-01-01')
Expand Down Expand Up @@ -1152,16 +1152,16 @@ def test_dti_add_intarray_no_freq(self, box):
# -------------------------------------------------------------
# Binary operations DatetimeIndex and timedelta-like

def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, box):
def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, dbox):
# GH#22005, GH#22163 check DataFrame doesn't raise TypeError
tz = tz_naive_fixture
rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
rng = tm.box_expected(rng, box)
rng = tm.box_expected(rng, dbox)

result = rng + two_hours
expected = pd.date_range('2000-01-01 02:00',
'2000-02-01 02:00', tz=tz)
expected = tm.box_expected(expected, box)
expected = tm.box_expected(expected, dbox)
tm.assert_equal(result, expected)

def test_dti_iadd_timedeltalike(self, tz_naive_fixture, two_hours):
Expand Down Expand Up @@ -1412,13 +1412,13 @@ def test_sub_dti_dti(self):
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('freq', [None, 'D'])
def test_sub_period(self, freq, box):
def test_sub_period(self, freq, dbox):
# GH#13078
# not supported, check TypeError
p = pd.Period('2011-01-01', freq='D')

idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq)
idx = tm.box_expected(idx, box)
idx = tm.box_expected(idx, dbox)

with pytest.raises(TypeError):
idx - p
Expand Down Expand Up @@ -1760,7 +1760,7 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names):
res3 = dti - other
tm.assert_series_equal(res3, expected_sub)

def test_dti_add_offset_tzaware(self, tz_aware_fixture, box):
def test_dti_add_offset_tzaware(self, tz_aware_fixture, dbox):
# GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype
timezone = tz_aware_fixture
if timezone == 'US/Pacific':
Expand All @@ -1773,8 +1773,8 @@ def test_dti_add_offset_tzaware(self, tz_aware_fixture, box):
expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00',
'2010-11-01 07:00'], freq='H', tz=timezone)

dates = tm.box_expected(dates, box)
expected = tm.box_expected(expected, box)
dates = tm.box_expected(dates, dbox)
expected = tm.box_expected(expected, dbox)

# TODO: parametrize over the scalar being added? radd? sub?
offset = dates + pd.offsets.Hour(5)
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,15 +579,15 @@ def test_pi_add_offset_n_gt1(self, box):
result = per.freq + pi
tm.assert_equal(result, expected)

def test_pi_add_offset_n_gt1_not_divisible(self, box):
def test_pi_add_offset_n_gt1_not_divisible(self, pbox):
# GH#23215
# PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0

pi = pd.PeriodIndex(['2016-01'], freq='2M')
pi = tm.box_expected(pi, box)
pi = tm.box_expected(pi, pbox)

expected = pd.PeriodIndex(['2016-04'], freq='2M')
expected = tm.box_expected(expected, box)
expected = tm.box_expected(expected, pbox)

result = pi + to_offset('3M')
tm.assert_equal(result, expected)
Expand Down Expand Up @@ -883,10 +883,10 @@ def test_pi_ops(self):
tm.assert_index_equal(result, exp)

@pytest.mark.parametrize('ng', ["str", 1.5])
def test_pi_ops_errors(self, ng, box):
def test_pi_ops_errors(self, ng, pbox):
idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
freq='M', name='idx')
obj = tm.box_expected(idx, box)
obj = tm.box_expected(idx, pbox)

msg = r"unsupported operand type\(s\)"
with tm.assert_raises_regex(TypeError, msg):
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
Tests for DatetimeArray
"""
import operator

import numpy as np

import pandas as pd
from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
import pandas.util.testing as tm


class TestDatetimeArrayComparisons(object):
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
# sufficiently robust

def test_cmp_dt64_arraylike_tznaive(self):
# arbitrary tz-naive DatetimeIndex
dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=None)
arr = DatetimeArray(dti)
assert arr.freq == dti.freq
assert arr.tz == dti.tz

right = dti

expected = np.ones(len(arr), dtype=bool)

for op in [operator.eq, operator.le, operator.ge]:
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
result = op(arr, arr)
tm.assert_numpy_array_equal(result, expected)
for other in [right, np.array(right)]:
# TODO: add list and tuple, and object-dtype once those
# are fixed in the constructor
result = op(arr, other)
tm.assert_numpy_array_equal(result, expected)

result = op(other, arr)
tm.assert_numpy_array_equal(result, expected)

# !=, <, >
expected = np.zeros(len(dti), dtype=bool)
tm.assert_numpy_array_equal(arr != arr, expected)

for op in [operator.ne, operator.lt, operator.gt]:
result = op(arr, arr)
tm.assert_numpy_array_equal(result, expected)
for other in [right, np.array(right)]:
# TODO: add list and tuple, and object-dtype once those
# are fixed in the constructor
result = op(arr, other)
tm.assert_numpy_array_equal(result, expected)

result = op(other, arr)
tm.assert_numpy_array_equal(result, expected)
23 changes: 22 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
IntervalIndex, MultiIndex, Panel, PeriodIndex, RangeIndex, Series,
TimedeltaIndex, bdate_range)
from pandas.core.algorithms import take_1d
from pandas.core.arrays import ExtensionArray, IntervalArray, PeriodArray
from pandas.core.arrays import (
ExtensionArray, IntervalArray, PeriodArray, period_array,
DatetimeArrayMixin as DatetimeArray)
import pandas.core.common as com

from pandas.io.common import urlopen
Expand Down Expand Up @@ -1049,6 +1051,18 @@ def assert_period_array_equal(left, right, obj='PeriodArray'):
assert_attr_equal('freq', left, right, obj=obj)


def assert_datetime_array_equal(left, right, obj='DatetimeArray'):
_check_isinstance(left, right, DatetimeArray)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed? why don’t we just have a more generic assert_array_equal

adding more comparison routines generally is just asking for trouble - we already have too many

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we may be able to de-duplicate these once DTA/TDA are EAs, but for now the options are to have this function or to implement something equivalent inside tm.assert_equal

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is fine to reconsider this once all internal ExtensionArrays are in place (I think the same was said when adding the assert_period_array_equal that is just above this one)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding more comparison routines generally is just asking for trouble - we already have too many

I really like having separate ones, so that you can assert that you aren't accidentally comparing two of the wrong kind of object. If you just have a generic assert_array_equal you could have built two ndarrays on accident and there's no way to check that the type is correct (unless you pass that as a parameter?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again not sure why this is; the point is we are comparing versus an expected value
so this just extra verbose

eg we recently consolidatednto assert_equal to avoid this exact prome
now adding these back is just more code

sure it’s slightly more explicit but IMHO is not worth the extra functions and maintenance over time

Copy link
Contributor

@TomAugspurger TomAugspurger Nov 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comparing versus an expected value

sometimes though, that expected value isn't constructed explicitly. Sometimes it's the result of some computation (think of the tests in ops for example).

eg we recently consolidatednto assert_equal to avoid this exact prome
now adding these back is just more code

assert_equal just dispatches to each of the specialized asserts. The code has to live somewhere :) But, I don't mean to nitpick over this. I would just rather type

tm.assert_period_array_equal(a, b)

than

tm.assert_array_equal(a, b, kind='period')

or, worse, tm.assert_array_equal(a, b), because I'm lazy, and accidentally not have a period array.

So that's my case, but I'm more than happy to be out voted here :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it’s fine

just want to reduce maintenance burden


assert_numpy_array_equal(left._data, right._data,
obj='{obj}._data'.format(obj=obj))
assert_attr_equal('freq', left, right, obj=obj)
assert_attr_equal('tz', left, right, obj=obj)

# Check that == works as expected
assert ((left == right) | (left._isnan & right._isnan)).all()
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved


def raise_assert_detail(obj, message, left, right, diff=None):
__tracebackhide__ = True

Expand Down Expand Up @@ -1546,6 +1560,8 @@ def assert_equal(left, right, **kwargs):
assert_interval_array_equal(left, right, **kwargs)
elif isinstance(left, PeriodArray):
assert_period_array_equal(left, right, **kwargs)
elif isinstance(left, DatetimeArray):
assert_datetime_array_equal(left, right, **kwargs)
elif isinstance(left, ExtensionArray):
assert_extension_array_equal(left, right, **kwargs)
elif isinstance(left, np.ndarray):
Expand Down Expand Up @@ -1573,6 +1589,11 @@ def box_expected(expected, box_cls):
expected = pd.Series(expected)
elif box_cls is pd.DataFrame:
expected = pd.Series(expected).to_frame()
elif box_cls is PeriodArray:
# the PeriodArray constructor is not as flexible as period_array
expected = period_array(expected)
elif box_cls is DatetimeArray:
expected = DatetimeArray(expected)
elif box_cls is np.ndarray:
expected = np.array(expected)
else:
Expand Down