From f25d24c604d06188cb4745b070d51d1cb10c3a38 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:27:56 -0800 Subject: [PATCH 01/18] implement _from_sequence --- pandas/core/arrays/datetimes.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 270e4757df30f..48191907ef031 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -170,6 +170,8 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): # Constructors _attributes = ["freq", "tz"] + _freq = None + _tz = None @classmethod def _simple_new(cls, values, freq=None, tz=None, **kwargs): @@ -194,6 +196,9 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs): return result def __new__(cls, values, freq=None, tz=None, dtype=None): + if isinstance(values, (list, tuple)) or is_object_dtype(values): + values = cls._from_sequence(values) + if tz is None and hasattr(values, 'tz'): # e.g. DatetimeIndex tz = values.tz @@ -207,9 +212,14 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if isinstance(values, DatetimeArrayMixin): + if lib.is_scalar(values): + raise ValueError('{cls}() must be called with a ' + 'collection of some kind, {data} was passed' + .format(cls=type(self).__name__, data=repr(data))) + elif isinstance(values, DatetimeArrayMixin): # extract nanosecond unix timestamps values = values.asi8 + if values.dtype == 'i8': values = values.view('M8[ns]') @@ -227,6 +237,18 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): # constructor, this does not call _deepcopy_if_needed return result + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + # list, tuple, or object-dtype ndarray/Index + values = np.array(scalars, dtype=np.object_, copy=copy) + + # TODO: See if we can decrease circularity + from pandas.core.tools.datetimes import to_datetime + values = to_datetime(values) + + # pass dtype to constructor in order to convert timezone if necessary + return cls(values, dtype=dtype) + @classmethod def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): From e47c200fd4fc2b0529301df238b0324ad55291ea Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:28:18 -0800 Subject: [PATCH 02/18] Add dtype to periodArray.__init__ --- pandas/core/arrays/period.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 90e7beac63427..0a64eda9c9551 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -165,7 +165,9 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, freq=None, copy=False): + def __init__(self, values, freq=None, dtype=None, copy=False): + freq = dtl.validate_dtype_freq(dtype, freq) + if freq is not None: freq = Period._maybe_convert_freq(freq) From 2cb7597917280cad1252cef9e9d496d53ff9ec1d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:29:07 -0800 Subject: [PATCH 03/18] implement require_m8ns_dtype, from_sequence --- pandas/core/arrays/timedeltas.py | 55 ++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9653121879c0d..e2b5fe81c8ec2 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -11,7 +11,7 @@ from pandas import compat from pandas.core.dtypes.common import ( - _TD_DTYPE, is_list_like) + _TD_DTYPE, is_list_like, is_object_dtype, is_timedelta64_dtype) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -112,9 +112,7 @@ def dtype(self): @classmethod def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): - # `dtype` is passed by _shallow_copy in corner cases, should always - # be timedelta64[ns] if present - assert dtype == _TD_DTYPE + _require_m8ns_dtype(dtype) assert isinstance(values, np.ndarray), type(values) if values.dtype == 'i8': @@ -127,13 +125,30 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result._freq = freq return result - def __new__(cls, values, freq=None): + def __new__(cls, values, freq=None, dtype=_TD_DTYPE): + _require_m8ns_dtype(dtype) + + if isinstance(values, (list, tuple)) or is_object_dtype(values): + values = cls._from_sequence(values)._data freq, freq_infer = dtl.maybe_infer_freq(freq) + if isinstance(values, TimedeltaArrayMixin): + if freq is None and values.freq is not None: + freq = values.freq + freq_infer = False + values = values._data + values = np.array(values, copy=False) - if values.dtype == np.object_: - values = array_to_timedelta64(values) + + if values.dtype == 'i8': + pass + elif not is_timedelta64_dtype(values): + raise TypeError(values.dtype) + elif values.dtype != _TD_DTYPE: + # i.e. non-nano unit + # TODO: use tslibs.conversion func? watch out for overflows + values = values.astype(_TD_DTYPE) result = cls._simple_new(values, freq=freq) if freq_infer: @@ -143,6 +158,14 @@ def __new__(cls, values, freq=None): return result + @classmethod + def _from_sequence(cls, scalars, dtype=_TD_DTYPE, copy=False): + # list, tuple, or object-dtype ndarray/Index + values = np.array(scalars, dtype=np.object_, copy=copy) + + result = array_to_timedelta64(values) + return cls(result, dtype=dtype) + @classmethod def _generate_range(cls, start, end, periods, freq, closed=None): @@ -413,3 +436,21 @@ def _generate_regular_range(start, end, periods, offset): data = np.arange(b, e, stride, dtype=np.int64) return data + + +def _require_m8ns_dtype(dtype): + """ + `dtype` is included in the constructor signature for consistency with + DatetimeArray and PeriodArray, but only timedelta64[ns] is considered + valid. Raise if anything else is passed. + + Parameters + ---------- + dtype : dtype + + Raises + ------ + ValueError + """ + if dtype != _TD_DTYPE: + raise ValueError("Only timedelta64[ns] dtype is valid.", dtype) From a4512b7e84f7a6e892f0b7d868ba45309a644ce6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:29:58 -0800 Subject: [PATCH 04/18] small cleanups in datetimeIndex.__new --- pandas/core/indexes/datetimes.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f87059ba1f017..f45cdea51fc72 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -175,7 +175,6 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, pandas.to_datetime : Convert argument to datetime """ _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget) - _shallow_copy = Index._shallow_copy _typ = 'datetimeindex' _join_precedence = 10 @@ -192,10 +191,11 @@ def _join_i8_wrapper(joinf, **kwargs): _engine_type = libindex.DatetimeEngine - tz = None + _tz = None _freq = None _comparables = ['name', 'freqstr', 'tz'] _attributes = ['name', 'freq', 'tz'] + timetuple = None # define my properties & methods for delegation _bool_ops = ['is_month_start', 'is_month_end', @@ -247,13 +247,13 @@ def __new__(cls, data=None, result.name = name return result + if is_scalar(data): + raise ValueError('{cls}() must be called with a ' + 'collection of some kind, {data} was passed' + .format(cls=cls.__name__, data=repr(data))) + if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): - if is_scalar(data): - raise ValueError('DatetimeIndex() must be called with a ' - 'collection of some kind, %s was passed' - % repr(data)) - # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype='O') @@ -273,16 +273,15 @@ def __new__(cls, data=None, data = data.tz_localize(tz, ambiguous=ambiguous) else: # the tz's must match - if str(tz) != str(data.tz): + if not timezones.tz_compare(tz, data.tz): msg = ('data is already tz-aware {0}, unable to ' 'set specified tz: {1}') raise TypeError(msg.format(data.tz, tz)) - subarr = data.values - if freq is None: freq = data.freq verify_integrity = False + data = data._data elif issubclass(data.dtype.type, np.datetime64): if data.dtype != _NS_DTYPE: data = conversion.ensure_datetime64ns(data) @@ -291,14 +290,13 @@ def __new__(cls, data=None, tz = timezones.maybe_get_tz(tz) data = conversion.tz_localize_to_utc(data.view('i8'), tz, ambiguous=ambiguous) - subarr = data.view(_NS_DTYPE) else: # must be integer dtype otherwise # assume this data are epoch timestamps if data.dtype != _INT64_DTYPE: data = data.astype(np.int64, copy=False) - subarr = data.view(_NS_DTYPE) + subarr = data.view(_NS_DTYPE) assert isinstance(subarr, np.ndarray), type(subarr) assert subarr.dtype == 'M8[ns]', subarr.dtype From a5ef9597702fa5ed6fb058b594a235491497d3b8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:30:41 -0800 Subject: [PATCH 05/18] dispatch parts of TimedeltaIndex.__new__ --- pandas/core/indexes/timedeltas.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 22ecefae8cbe2..aedf3367ff8c7 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -167,16 +167,12 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, elif copy: data = np.array(data, copy=True) - data = np.array(data, copy=False) - if data.dtype == np.object_: - data = array_to_timedelta64(data) - if data.dtype != _TD_DTYPE: - if is_timedelta64_dtype(data): - # non-nano unit - # TODO: watch out for overflows - data = data.astype(_TD_DTYPE) - else: - data = ensure_int64(data).view(_TD_DTYPE) + arr = TimedeltaArrayMixin(data, freq=freq) + if freq_infer and arr.freq is not None: + freq_infer = False + verify_integrity = False + freq = arr.freq + data = arr._data assert data.dtype == 'm8[ns]', data.dtype @@ -209,8 +205,6 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): result._reset_identity() return result - _shallow_copy = Index._shallow_copy - @property def _formatter_func(self): from pandas.io.formats.format import _get_format_timedelta64 From 83b04fe970a8a75f310c685e6e06446b0ea54a7e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:35:56 -0800 Subject: [PATCH 06/18] add copy to constructors --- pandas/core/arrays/datetimes.py | 9 +++++---- pandas/core/arrays/timedeltas.py | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 48191907ef031..e21806fe97c8a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -195,9 +195,10 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs): result._tz = timezones.tz_standardize(tz) return result - def __new__(cls, values, freq=None, tz=None, dtype=None): + def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): if isinstance(values, (list, tuple)) or is_object_dtype(values): - values = cls._from_sequence(values) + values = cls._from_sequence(values, copy=copy) + # TODO: Can we set copy=False here to avoid re-coping? if tz is None and hasattr(values, 'tz'): # e.g. DatetimeIndex @@ -215,7 +216,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): if lib.is_scalar(values): raise ValueError('{cls}() must be called with a ' 'collection of some kind, {data} was passed' - .format(cls=type(self).__name__, data=repr(data))) + .format(cls=cls.__name__, data=repr(data))) elif isinstance(values, DatetimeArrayMixin): # extract nanosecond unix timestamps values = values.asi8 @@ -225,7 +226,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): assert isinstance(values, np.ndarray), type(values) assert is_datetime64_dtype(values) # not yet assured nanosecond - values = conversion.ensure_datetime64ns(values, copy=False) + values = conversion.ensure_datetime64ns(values, copy=copy) result = cls._simple_new(values, freq=freq, tz=tz) if freq_infer: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index e2b5fe81c8ec2..e8ac6fbb8d947 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -125,11 +125,12 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result._freq = freq return result - def __new__(cls, values, freq=None, dtype=_TD_DTYPE): + def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): _require_m8ns_dtype(dtype) if isinstance(values, (list, tuple)) or is_object_dtype(values): - values = cls._from_sequence(values)._data + values = cls._from_sequence(values, copy=copy)._data + # TODO: can we set copy=False to avoid re-copying? freq, freq_infer = dtl.maybe_infer_freq(freq) @@ -139,7 +140,7 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE): freq_infer = False values = values._data - values = np.array(values, copy=False) + values = np.array(values, copy=copy) if values.dtype == 'i8': pass From e8abc8307e3d4e2253f06b25e58b7ec58b0085b3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:43:31 -0800 Subject: [PATCH 07/18] small cleanup --- pandas/core/arrays/datetimes.py | 3 ++- pandas/core/indexes/datetimes.py | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e21806fe97c8a..92983f7f5813c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -201,7 +201,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): # TODO: Can we set copy=False here to avoid re-coping? if tz is None and hasattr(values, 'tz'): - # e.g. DatetimeIndex + # e.g. DatetimeArray, DatetimeIndex tz = values.tz if freq is None and hasattr(values, "freq"): @@ -213,6 +213,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) + # TODO: what about ABCSeries? if lib.is_scalar(values): raise ValueError('{cls}() must be called with a ' 'collection of some kind, {data} was passed' diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f45cdea51fc72..3aeaf66472353 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -219,6 +219,9 @@ def _join_i8_wrapper(joinf, **kwargs): _timezone = cache_readonly(DatetimeArrayMixin._timezone.fget) is_normalized = cache_readonly(DatetimeArrayMixin.is_normalized.fget) + # -------------------------------------------------------------------- + # Constructors + def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, tz=None, normalize=False, closed=None, ambiguous='raise', @@ -252,7 +255,7 @@ def __new__(cls, data=None, 'collection of some kind, {data} was passed' .format(cls=cls.__name__, data=repr(data))) - if not isinstance(data, (np.ndarray, Index, ABCSeries, + elif not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): if not isinstance(data, (list, tuple)): data = list(data) @@ -318,12 +321,6 @@ def __new__(cls, data=None, return subarr._deepcopy_if_needed(ref_to_data, copy) - def _convert_for_op(self, value): - """ Convert value to be insertable to ndarray """ - if self._has_same_tz(value): - return _to_m8(value) - raise ValueError('Passed item and index have different timezone') - @classmethod def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None, **kwargs): @@ -340,6 +337,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, result._reset_identity() return result + # -------------------------------------------------------------------- + @property def _values(self): # tz-naive -> ndarray @@ -391,6 +390,12 @@ def _is_dates_only(self): from pandas.io.formats.format import _is_dates_only return _is_dates_only(self.values) and self.tz is None + def _convert_for_op(self, value): + """ Convert value to be insertable to ndarray """ + if self._has_same_tz(value): + return _to_m8(value) + raise ValueError('Passed item and index have different timezone') + @property def _formatter_func(self): from pandas.io.formats.format import _get_format_datetime64 From 98dca45aaf030a3a42ab40dbe4714f02dd553e7c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 09:10:06 -0800 Subject: [PATCH 08/18] implement maybe_define_freq --- pandas/core/arrays/datetimelike.py | 16 ++++++++++++++++ pandas/core/arrays/datetimes.py | 7 ++----- pandas/core/arrays/timedeltas.py | 7 +------ pandas/core/indexes/datetimes.py | 8 ++------ pandas/core/indexes/timedeltas.py | 7 +------ 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 95b997fae6b6c..328da9c94b8fd 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -888,6 +888,22 @@ def maybe_infer_freq(freq): return freq, freq_infer +def maybe_define_freq(freq_infer, result): + """ + If appropriate, infer the frequency of the given Datetime/Timedelta Array + and pin it to the object at the end of the construction. + + Parameters + ---------- + freq_infer : bool + result : DatetimeArray or TimedeltaArray + """ + if freq_infer: + inferred = result.inferred_freq + if inferred: + result.freq = frequencies.to_offset(inferred) + + def validate_tz_from_dtype(dtype, tz): """ If the given dtype is a DatetimeTZDtype, extract the implied diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 624cd9a216148..d83e15b6ac28a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -217,7 +217,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): if lib.is_scalar(values): raise ValueError('{cls}() must be called with a ' 'collection of some kind, {data} was passed' - .format(cls=cls.__name__, data=repr(data))) + .format(cls=cls.__name__, data=repr(values))) elif isinstance(values, DatetimeArrayMixin): # extract nanosecond unix timestamps values = values.asi8 @@ -230,10 +230,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): values = conversion.ensure_datetime64ns(values, copy=copy) result = cls._simple_new(values, freq=freq, tz=tz) - if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) + dtl.maybe_define_freq(freq_infer, result) # NB: Among other things not yet ported from the DatetimeIndex # constructor, this does not call _deepcopy_if_needed diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index e8ac6fbb8d947..a098425fd2c79 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -19,7 +19,6 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.tseries.offsets import Tick -from pandas.tseries.frequencies import to_offset from . import datetimelike as dtl @@ -152,11 +151,7 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): values = values.astype(_TD_DTYPE) result = cls._simple_new(values, freq=freq) - if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) - + dtl.maybe_define_freq(freq_infer, result) return result @classmethod diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4ebe334441954..83fa60c16c40d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -263,7 +263,7 @@ def __new__(cls, data=None, .format(cls=cls.__name__, data=repr(data))) elif not isinstance(data, (np.ndarray, Index, ABCSeries, - DatetimeArrayMixin)): + DatetimeArrayMixin)): if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype='O') @@ -321,11 +321,7 @@ def __new__(cls, data=None, if freq is not None and not freq_infer: cls._validate_frequency(subarr, freq, ambiguous=ambiguous) - if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) - + dtl.maybe_define_freq(freq_infer, subarr) return subarr._deepcopy_if_needed(ref_to_data, copy) @classmethod diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index aedf3367ff8c7..748f660cc572e 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -35,7 +35,6 @@ to_timedelta, _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, join as libjoin, Timedelta, NaT) -from pandas._libs.tslibs.timedeltas import array_to_timedelta64 class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, @@ -182,11 +181,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, if freq is not None and not freq_infer: cls._validate_frequency(subarr, freq) - if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) - + dtl.maybe_define_freq(freq_infer, subarr) return subarr @classmethod From 56fd95e787c1ae01966fd8019aaa36f92cd6d969 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 09:26:43 -0800 Subject: [PATCH 09/18] handle ABSeries --- pandas/core/arrays/datetimes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d83e15b6ac28a..5175ea1f88aaa 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -213,11 +213,13 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - # TODO: what about ABCSeries? if lib.is_scalar(values): raise ValueError('{cls}() must be called with a ' 'collection of some kind, {data} was passed' .format(cls=cls.__name__, data=repr(values))) + elif isinstance(values, ABCSeries): + # extract nanosecond unix timestamps + values = values._values.asi8 elif isinstance(values, DatetimeArrayMixin): # extract nanosecond unix timestamps values = values.asi8 From 5f92cfa55865dc571dccb5dc8849565d1410e76e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 10:33:46 -0800 Subject: [PATCH 10/18] implement basic constructor tests, fix just enough of the comparisons to pass them --- pandas/core/arrays/datetimes.py | 9 +++- pandas/tests/arrays/test_datetimes.py | 74 +++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/arrays/test_datetimes.py diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 5175ea1f88aaa..55ffc33328500 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -119,7 +119,8 @@ def wrapper(self, other): if isinstance(other, list): # FIXME: This can break for object-dtype with mixed types other = type(self)(other) - elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): + elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, + DatetimeArrayMixin)): # Following Timestamp convention, __eq__ is all-False # and __ne__ is all True, others raise TypeError. return ops.invalid_comparison(self, other, op) @@ -204,6 +205,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): # e.g. DatetimeArray, DatetimeIndex tz = values.tz + # TODO: what about if freq == 'infer'? if freq is None and hasattr(values, "freq"): # i.e. DatetimeArray, DatetimeIndex freq = values.freq @@ -219,7 +221,10 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): .format(cls=cls.__name__, data=repr(values))) elif isinstance(values, ABCSeries): # extract nanosecond unix timestamps - values = values._values.asi8 + if tz is None: + # TODO: Try to do this in just one place + tz = values.dt.tz + values = np.array(values.view('i8')) elif isinstance(values, DatetimeArrayMixin): # extract nanosecond unix timestamps values = values.asi8 diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py new file mode 100644 index 0000000000000..6c56c68d55e7d --- /dev/null +++ b/pandas/tests/arrays/test_datetimes.py @@ -0,0 +1,74 @@ +""" +Tests for DatetimeArray +""" +import pytest +import numpy as np + +from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray + +import pandas as pd +import pandas.util.testing as tm + + + + +class TestDatetimeArrayConstructors(object): + + def test_init_from_object_dtype(self, tz_naive_fixture): + tz = tz_naive_fixture + if tz is not None: + pytest.xfail(reason="Casting DatetimeIndex to object-dtype raises " + "for pd.Index and is incorrect for np.array; " + "GH#24391") + + # arbitrary DatetimeIndex; this should work for any DatetimeIndex + # with non-None freq + dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=tz) + expected = DatetimeArray(dti) + + # Fails because np.array(dti, dtype=object) incorrectly returns Longs + result = DatetimeArray(np.array(dti, dtype=object), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) + + # Fails because `pd.Index(dti, dtype=object) raises incorrectly + result = DatetimeArray(pd.Index(dti, dtype=object), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) + + # NB: for now we re-wrap in DatetimeIndex to use assert_index_equal + # once assert_datetime_array_equal is in place, this will be changed + def test_init_only_freq_infer(self, tz_naive_fixture): + # just pass data and freq='infer' if relevant; no other kwargs + tz = tz_naive_fixture + + # arbitrary DatetimeIndex; this should work for any DatetimeIndex + # with non-None freq + dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=tz) + expected = DatetimeArray(dti) + assert expected.freq == dti.freq + assert expected.tz == dti.tz + + # broken until ABCDatetimeArray and isna is fixed + # assert (dti == expected).all() + # assert (expected == dti).all() + + result = DatetimeArray(expected) + tm.assert_equal(pd.DatetimeIndex(result), dti) + + result = DatetimeArray(list(dti), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) + + result = DatetimeArray(tuple(dti), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) + + if tz is None: + result = DatetimeArray(np.array(dti), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) + + result = DatetimeArray(np.array(dti).astype('M8[s]'), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) + + result = DatetimeArray(pd.Series(dti), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) + + result = DatetimeArray(pd.Series(dti, dtype=object), freq='infer') + tm.assert_equal(pd.DatetimeIndex(result), dti) From 0e1553624feffd1f26206c591cab6501180ec232 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 10:45:29 -0800 Subject: [PATCH 11/18] add note --- pandas/core/arrays/datetimelike.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 328da9c94b8fd..b2dc4a8645d99 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -897,6 +897,11 @@ def maybe_define_freq(freq_infer, result): ---------- freq_infer : bool result : DatetimeArray or TimedeltaArray + + Notes + ----- + This may alter `result` in-place, should only ever be called + from __new__/__init__. """ if freq_infer: inferred = result.inferred_freq From 1a015f6803887bf88d9afadecc228daa7ae3fe9c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 10:58:17 -0800 Subject: [PATCH 12/18] flake8 fixup --- pandas/tests/arrays/test_datetimes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 6c56c68d55e7d..9b9659d414040 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -10,8 +10,6 @@ import pandas.util.testing as tm - - class TestDatetimeArrayConstructors(object): def test_init_from_object_dtype(self, tz_naive_fixture): From 5445a5653ba106a02e44c4fa449dbd7a150fe658 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 14:22:55 -0800 Subject: [PATCH 13/18] implement scalar_data_error, with tests --- pandas/core/arrays/datetimelike.py | 6 ++++++ pandas/core/arrays/datetimes.py | 4 +--- pandas/core/indexes/datetimes.py | 4 +--- pandas/tests/arrays/test_datetimes.py | 10 ++++++++++ 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b2dc4a8645d99..4e5699539eee0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -801,6 +801,12 @@ def _evaluate_compare(self, other, op): # ------------------------------------------------------------------- # Shared Constructor Helpers +def scalar_data_error(scalar, cls): + return ('{cls}() must be called with a ' + 'collection of some kind, {data} was passed' + .format(cls=cls.__name__, data=repr(scalar))) + + def validate_periods(periods): """ If a `periods` argument is passed to the Datetime/Timedelta Array/Index diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 55ffc33328500..e8ef1c19170c7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -216,9 +216,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): tz = dtl.validate_tz_from_dtype(dtype, tz) if lib.is_scalar(values): - raise ValueError('{cls}() must be called with a ' - 'collection of some kind, {data} was passed' - .format(cls=cls.__name__, data=repr(values))) + raise TypeError(dtl.scalar_data_error(values, cls)) elif isinstance(values, ABCSeries): # extract nanosecond unix timestamps if tz is None: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 83fa60c16c40d..7755c5e90d8aa 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -258,9 +258,7 @@ def __new__(cls, data=None, return result if is_scalar(data): - raise ValueError('{cls}() must be called with a ' - 'collection of some kind, {data} was passed' - .format(cls=cls.__name__, data=repr(data))) + raise TypeError(dtl.scalar_data_error(data, cls)) elif not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 9b9659d414040..bb35a1d325a83 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -12,7 +12,16 @@ class TestDatetimeArrayConstructors(object): + def test_scalar_raises_type_error(self): + # GH#23493 + with pytest.raises(TypeError): + DatetimeArray(2) + + with pytest.raises(TypeError): + pd.DatetimeIndex(pd.Timestamp.now()) + def test_init_from_object_dtype(self, tz_naive_fixture): + # GH#23493 tz = tz_naive_fixture if tz is not None: pytest.xfail(reason="Casting DatetimeIndex to object-dtype raises " @@ -35,6 +44,7 @@ def test_init_from_object_dtype(self, tz_naive_fixture): # NB: for now we re-wrap in DatetimeIndex to use assert_index_equal # once assert_datetime_array_equal is in place, this will be changed def test_init_only_freq_infer(self, tz_naive_fixture): + # GH#23493 # just pass data and freq='infer' if relevant; no other kwargs tz = tz_naive_fixture From 272f4b1d55428626c3efb26aa3bc520c155e440c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 14:24:08 -0800 Subject: [PATCH 14/18] docstring --- pandas/core/arrays/datetimelike.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4e5699539eee0..d1bd0f65fea90 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -802,6 +802,19 @@ def _evaluate_compare(self, other, op): # Shared Constructor Helpers def scalar_data_error(scalar, cls): + """ + Produce the error message to issue when raising a TypeError if a scalar + is passed to an array constructor. + + Parameters + ---------- + scalar : object + cls : class + + Returns + ------- + message : str + """ return ('{cls}() must be called with a ' 'collection of some kind, {data} was passed' .format(cls=cls.__name__, data=repr(scalar))) From 35195bd5bcdfd5a52d47934d92032dcbc05c742f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 17:23:52 -0800 Subject: [PATCH 15/18] Fix TimedeltaArray infer_freq; implement tests --- pandas/core/arrays/datetimes.py | 2 + pandas/core/arrays/timedeltas.py | 25 +++++++- pandas/core/indexes/timedeltas.py | 14 +++- pandas/tests/arrays/test_datetimes.py | 16 ++++- pandas/tests/arrays/test_timedeltas.py | 89 ++++++++++++++++++++++++++ pandas/tseries/frequencies.py | 2 +- 6 files changed, 141 insertions(+), 7 deletions(-) create mode 100644 pandas/tests/arrays/test_timedeltas.py diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e8ef1c19170c7..512f7c98aded1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -245,6 +245,8 @@ def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False): def _from_sequence(cls, scalars, dtype=None, copy=False): # list, tuple, or object-dtype ndarray/Index values = np.array(scalars, dtype=np.object_, copy=copy) + if values.ndim != 1: + raise TypeError("Values must be 1-dimensional") # TODO: See if we can decrease circularity from pandas.core.tools.datetimes import to_datetime diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a098425fd2c79..1ce72a5ee8e4e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -3,7 +3,7 @@ import numpy as np -from pandas._libs import tslibs +from pandas._libs import tslibs, lib, algos from pandas._libs.tslibs import Timedelta, Timestamp, NaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import array_to_timedelta64 @@ -133,7 +133,9 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): freq, freq_infer = dtl.maybe_infer_freq(freq) - if isinstance(values, TimedeltaArrayMixin): + if lib.is_scalar(values): + raise TypeError(dtl.scalar_data_error(values, cls)) + elif isinstance(values, TimedeltaArrayMixin): if freq is None and values.freq is not None: freq = values.freq freq_infer = False @@ -158,6 +160,8 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): def _from_sequence(cls, scalars, dtype=_TD_DTYPE, copy=False): # list, tuple, or object-dtype ndarray/Index values = np.array(scalars, dtype=np.object_, copy=copy) + if values.ndim != 1: + raise TypeError("Values must be 1-dimensional") result = array_to_timedelta64(values) return cls(result, dtype=dtype) @@ -199,6 +203,23 @@ def _generate_range(cls, start, end, periods, freq, closed=None): return cls._simple_new(index, freq=freq) + # ---------------------------------------------------------------- + # Array-Like Methods + # NB: these are appreciably less efficient than the TimedeltaIndex versions + + @property + def is_monotonic_increasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[0] + + @property + def is_monotonic_decreasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[1] + + @property + def is_unique(self): + from pandas.core.algorithms import unique1d + return len(unique1d(self.asi8)) == len(self) + # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 748f660cc572e..32420dfbeeae3 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -156,9 +156,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, data = to_timedelta(data, unit=unit, box=False) if is_scalar(data): - raise ValueError('TimedeltaIndex() must be called with a ' - 'collection of some kind, {data} was passed' - .format(data=repr(data))) + raise dtl.scalar_data_error(data, cls) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: @@ -232,6 +230,9 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): nat_rep=na_rep, justify='all').get_result() + # ----------------------------------------------------------------- + # Wrapping TimedeltaArray + days = wrap_field_accessor(TimedeltaArrayMixin.days) seconds = wrap_field_accessor(TimedeltaArrayMixin.seconds) microseconds = wrap_field_accessor(TimedeltaArrayMixin.microseconds) @@ -239,6 +240,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): total_seconds = wrap_array_method(TimedeltaArrayMixin.total_seconds, True) + # override TimedeltaArray versions + is_monotonic_increasing = Index.is_monotonic_increasing + is_monotonic_decreasing = Index.is_monotonic_decreasing + is_unique = Index.is_unique + + # ----------------------------------------------------------------- + @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index bb35a1d325a83..a6fb22f93f5e0 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -20,6 +20,21 @@ def test_scalar_raises_type_error(self): with pytest.raises(TypeError): pd.DatetimeIndex(pd.Timestamp.now()) + def test_from_sequence_requires_1dim(self): + arr2d = np.arange(10).view('M8[s]').astype(object).reshape(2, 5) + with pytest.raises(TypeError): + DatetimeArray(arr2d) + + with pytest.raises(TypeError): + pd.DatetimeIndex(arr2d) + + arr0d = np.array(pd.Timestamp.now()) + with pytest.raises(TypeError): + DatetimeArray(arr0d) + + with pytest.raises(TypeError): + pd.DatetimeIndex(arr0d) + def test_init_from_object_dtype(self, tz_naive_fixture): # GH#23493 tz = tz_naive_fixture @@ -31,7 +46,6 @@ def test_init_from_object_dtype(self, tz_naive_fixture): # arbitrary DatetimeIndex; this should work for any DatetimeIndex # with non-None freq dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=tz) - expected = DatetimeArray(dti) # Fails because np.array(dti, dtype=object) incorrectly returns Longs result = DatetimeArray(np.array(dti, dtype=object), freq='infer') diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py new file mode 100644 index 0000000000000..0ec9432e61bbc --- /dev/null +++ b/pandas/tests/arrays/test_timedeltas.py @@ -0,0 +1,89 @@ +""" +Tests for TimedeltaArray +""" + +import numpy as np +import pytest + +from pandas.core.arrays import TimedeltaArrayMixin as TimedeltaArray + +import pandas as pd +import pandas.util.testing as tm + + +# TODO: Many of these tests are mirrored in test_datetimes; see if these +# can be shared +class TestTimedeltaArrayConstructors(object): + def test_scalar_raises_type_error(self): + # GH#23493 + with pytest.raises(TypeError): + TimedeltaArray(2) + + with pytest.raises(TypeError): + pd.TimedeltaIndex(pd.Timedelta(days=4)) + + def test_from_sequence_requires_1dim(self): + arr2d = np.arange(10).view('m8[s]').astype(object).reshape(2, 5) + with pytest.raises(TypeError): + TimedeltaArray(arr2d) + + with pytest.raises(TypeError): + pd.TimedeltaIndex(arr2d) + + arr0d = np.array(pd.Timedelta('49 days')) + with pytest.raises(TypeError): + TimedeltaArray(arr0d) + + with pytest.raises(TypeError): + pd.TimedeltaIndex(arr0d) + + def test_init_from_object_dtype(self): + # GH#23493 + + # arbitrary TimedeltaIndex; this should work for any TimedeltaIndex + # with non-None freq + tdi = pd.timedelta_range('3 Days', freq='ms', periods=9) + + # Fails because np.array(tdi, dtype=object) incorrectly returns Longs + result = TimedeltaArray(np.array(tdi, dtype=object), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + # Fails because `pd.Index(tdi, dtype=object) raises incorrectly + result = TimedeltaArray(pd.Index(tdi, dtype=object), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + # NB: for now we re-wrap in TimedeltaIndex to use assert_index_equal + # once assert_timedelta_array_equal is in place, this will be changed + def test_init_only_freq_infer(self): + # GH#23493 + # just pass data and freq='infer' if relevant; no other kwargs + + # arbitrary TimedeltaIndex; this should work for any TimedeltaIndex + # with non-None freq + tdi = pd.timedelta_range('3 Days', freq='H', periods=9) + expected = TimedeltaArray(tdi) + assert expected.freq == tdi.freq + + assert (tdi == expected).all() + assert (expected == tdi).all() + + result = TimedeltaArray(expected) + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + result = TimedeltaArray(list(tdi), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + result = TimedeltaArray(tuple(tdi), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + result = TimedeltaArray(np.array(tdi), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + result = TimedeltaArray(np.array(tdi).astype('m8[s]'), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + result = TimedeltaArray(pd.Series(tdi), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) + + result = TimedeltaArray(pd.Series(tdi, dtype=object), freq='infer') + tm.assert_equal(pd.TimedeltaIndex(result), tdi) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d6e4824575468..ac9a87b258056 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -292,7 +292,7 @@ class _FrequencyInferer(object): def __init__(self, index, warn=True): self.index = index - self.values = np.asarray(index).view('i8') + self.values = index.asi8 # This moves the values, which are implicitly in UTC, to the # the timezone so they are in local time From 510ae3de48540b2c16b2e7c09babb385b74b490f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 20:43:58 -0800 Subject: [PATCH 16/18] update tests for changed exception type --- pandas/core/indexes/datetimes.py | 5 +++++ pandas/core/indexes/timedeltas.py | 2 +- pandas/tests/indexes/datetimes/test_construction.py | 4 +++- pandas/tests/indexes/timedeltas/test_construction.py | 4 +++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7755c5e90d8aa..aea2f4e2bc8c4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1101,6 +1101,9 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): else: raise + # ----------------------------------------------------------------- + # Wrapping DatetimeArray + year = wrap_field_accessor(DatetimeArrayMixin.year) month = wrap_field_accessor(DatetimeArrayMixin.month) day = wrap_field_accessor(DatetimeArrayMixin.day) @@ -1139,6 +1142,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): month_name = wrap_array_method(DatetimeArrayMixin.month_name, True) day_name = wrap_array_method(DatetimeArrayMixin.day_name, True) + # ----------------------------------------------------------------- + @Substitution(klass='DatetimeIndex') @Appender(_shared_docs['searchsorted']) def searchsorted(self, value, side='left', sorter=None): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 32420dfbeeae3..e9288904c5d2e 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -156,7 +156,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, data = to_timedelta(data, unit=unit, box=False) if is_scalar(data): - raise dtl.scalar_data_error(data, cls) + raise TypeError(dtl.scalar_data_error(data, cls)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 7a251a8ecfb28..3cecd024cb85e 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -320,7 +320,9 @@ def test_constructor_coverage(self): pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', end='1/10/2000') - pytest.raises(ValueError, DatetimeIndex, '1/1/2000') + with pytest.raises(TypeError): + # GH#24393 + DatetimeIndex('1/1/2000') # generator expression gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index a5cfad98b31c1..84a4800a9b61f 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -63,7 +63,9 @@ def test_constructor_coverage(self): pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') - pytest.raises(ValueError, TimedeltaIndex, '1 days') + with pytest.raises(TypeError): + # GH#23493 + TimedeltaIndex('1 days') # generator expression gen = (timedelta(i) for i in range(10)) From 49cf4952ab17b367543721a5f871704d1a4ddbab Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 20:45:38 -0800 Subject: [PATCH 17/18] remove redundant dtype check --- pandas/core/indexes/datetimes.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index aea2f4e2bc8c4..d1faa916ca2d5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -309,11 +309,6 @@ def __new__(cls, data=None, assert subarr.dtype == 'M8[ns]', subarr.dtype subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) - if dtype is not None: - if not is_dtype_equal(subarr.dtype, dtype): - # dtype must be coerced to DatetimeTZDtype above - if subarr.tz is not None: - raise ValueError("cannot localize from non-UTC data") if verify_integrity and len(subarr) > 0: if freq is not None and not freq_infer: From 3a6263321d7740e6f8039a04a690e686a934fde7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 20:54:04 -0800 Subject: [PATCH 18/18] implement maybe_validate_freq --- pandas/core/arrays/datetimelike.py | 20 ++++++++++++++++++++ pandas/core/indexes/datetimes.py | 7 ++----- pandas/core/indexes/timedeltas.py | 6 +----- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d1bd0f65fea90..cece1ea28203e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -928,6 +928,26 @@ def maybe_define_freq(freq_infer, result): result.freq = frequencies.to_offset(inferred) +def maybe_validate_freq(result, verify, freq, freq_infer, **kwargs): + """ + If a frequency was passed by the user and not inferred or extracted + from the underlying data, then validate that the data is consistent with + the user-provided frequency. + + Parameters + ---------- + result : DatetimeIndex or TimedeltaIndex + verify : bool + freq : DateOffset or None + freq_infer : bool + **kwargs : arguments to pass to `_validate_frequency` + For DatetimeIndex this is just "ambiguous", empty for TimedeltaIndex + """ + if verify and len(result) > 0: + if freq is not None and not freq_infer: + result._validate_frequency(result, freq, **kwargs) + + def validate_tz_from_dtype(dtype, tz): """ If the given dtype is a DatetimeTZDtype, extract the implied diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d1faa916ca2d5..b4e38fd9f6bdf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -309,11 +309,8 @@ def __new__(cls, data=None, assert subarr.dtype == 'M8[ns]', subarr.dtype subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) - - if verify_integrity and len(subarr) > 0: - if freq is not None and not freq_infer: - cls._validate_frequency(subarr, freq, ambiguous=ambiguous) - + dtl.maybe_validate_freq(subarr, verify_integrity, freq, freq_infer, + ambiguous=ambiguous) dtl.maybe_define_freq(freq_infer, subarr) return subarr._deepcopy_if_needed(ref_to_data, copy) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index e9288904c5d2e..f4f844eb5c7f1 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -174,11 +174,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, assert data.dtype == 'm8[ns]', data.dtype subarr = cls._simple_new(data, name=name, freq=freq) - # check that we are matching freqs - if verify_integrity and len(subarr) > 0: - if freq is not None and not freq_infer: - cls._validate_frequency(subarr, freq) - + dtl.maybe_validate_freq(subarr, verify_integrity, freq, freq_infer) dtl.maybe_define_freq(freq_infer, subarr) return subarr