diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 2341187c40a9ed..45630f8109932d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1503,6 +1503,83 @@ def maybe_convert_dtype(data, copy): return data, copy +def objects_to_datetime64ns(data, dayfirst, yearfirst, + utc=False, errors="raise", + require_iso8601=False, allow_object=False): + """ + Convert data to array of timestamps. + + Parameters + ---------- + data : np.ndarray[object] + dayfirst : bool + yearfirst : bool + utc : bool, default False + Whether to convert timezone-aware timestamps to UTC + errors : {'raise', 'ignore', 'coerce'} + allow_object : bool + Whether to return an object-dtype ndarray instead of raising if the + data contains more than one timezone. + + Returns + ------- + result : ndarray + np.int64 dtype if returned values represent UTC timestamps + np.datetime64[ns] if returned values represent wall times + object if mixed timezones + inferred_tz : tzinfo or None + + Raises + ------ + ValueError : if data cannot be converted to datetimes + """ + assert errors in ["raise", "ignore", "coerce"] + + # if str-dtype, convert + data = np.array(data, copy=False, dtype=np.object_) + + try: + result, tz_parsed = tslib.array_to_datetime( + data, + errors=errors, + utc=utc, + dayfirst=dayfirst, + yearfirst=yearfirst, + require_iso8601=require_iso8601 + ) + except ValueError as e: + try: + values, tz_parsed = conversion.datetime_to_datetime64(data) + # If tzaware, these values represent unix timestamps, so we + # return them as i8 to distinguish from wall times + return values.view('i8'), tz_parsed + except (ValueError, TypeError): + raise e + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + # Return i8 values to denote unix timestamps + return result.view('i8'), tz_parsed + elif is_datetime64_dtype(result): + # returning M8[ns] denotes wall-times; since tz is None + # the distinction is a thin one + return result, tz_parsed + elif is_object_dtype(result): + # GH#23675 when called via `pd.to_datetime`, returning an object-dtype + # array is allowed. When called via `pd.DatetimeIndex`, we can + # only accept datetime64 dtype, so raise TypeError if object-dtype + # is returned, as that indicates the values can be recognized as + # datetimes but they have conflicting timezones/awareness + if allow_object: + return result, tz_parsed + raise TypeError(result) + else: # pragma: no cover + # GH#23675 this TypeError should never be hit, whereas the TypeError + # in the object-dtype branch above is reachable. + raise TypeError(result) + + def _generate_regular_range(cls, start, end, periods, freq): """ Generate a range of dates with the spans between dates described by diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8f36096d128c29..b778b2132cd96e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -17,8 +17,8 @@ from pandas.core.dtypes.common import ( _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_float, - is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar, - is_string_like, pandas_dtype) + is_integer, is_list_like, is_object_dtype, is_period_dtype, is_scalar, + is_string_dtype, is_string_like, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -26,7 +26,7 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype, - maybe_infer_tz) + maybe_infer_tz, objects_to_datetime64ns) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs @@ -281,10 +281,19 @@ def __new__(cls, data=None, # By this point we are assured to have either a numpy array or Index data, copy = maybe_convert_dtype(data, copy) - if not (is_datetime64_dtype(data) or is_datetime64tz_dtype(data) or - is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): - data = tools.to_datetime(data, dayfirst=dayfirst, - yearfirst=yearfirst) + + if is_object_dtype(data) or is_string_dtype(data): + # TODO: We do not have tests specific to string-dtypes, + # also complex or categorical or other extension + copy = False + if lib.infer_dtype(data) == 'integer': + data = data.astype(np.int64) + else: + # data comes back here as either i8 to denote UTC timestamps + # or M8[ns] to denote wall times + data, inferred_tz = objects_to_datetime64ns( + data, dayfirst=dayfirst, yearfirst=yearfirst) + tz = maybe_infer_tz(tz, inferred_tz) if is_datetime64tz_dtype(data): tz = maybe_infer_tz(tz, data.tz) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1266b57c098cd9..7a87e33c7f97e3 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -171,7 +171,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex - from pandas.core.arrays.datetimes import maybe_convert_dtype + from pandas.core.arrays.datetimes import ( + maybe_convert_dtype, objects_to_datetime64ns) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -233,8 +234,9 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, tz_parsed = None result = None - try: - if format is not None: + + if format is not None: + try: # shortcut formatting here if format == '%Y%m%d': try: @@ -266,24 +268,22 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if errors == 'raise': raise result = arg - - if result is None: - assert format is None or infer_datetime_format - result, tz_parsed = tslib.array_to_datetime( - arg, - errors=errors, - utc=tz == 'utc', - dayfirst=dayfirst, - yearfirst=yearfirst, - require_iso8601=require_iso8601 - ) - except ValueError as e: - # Fallback to try to convert datetime objects - try: - values, tz = conversion.datetime_to_datetime64(arg) - return DatetimeIndex._simple_new(values, name=name, tz=tz) - except (ValueError, TypeError): - raise e + except ValueError as e: + # Fallback to try to convert datetime objects if timezone-aware + # datetime objects are found without passing `utc=True` + try: + values, tz = conversion.datetime_to_datetime64(arg) + return DatetimeIndex._simple_new(values, name=name, tz=tz) + except (ValueError, TypeError): + raise e + + if result is None: + assert format is None or infer_datetime_format + utc = tz == 'utc' + result, tz_parsed = objects_to_datetime64ns( + arg, dayfirst=dayfirst, yearfirst=yearfirst, + utc=utc, errors=errors, require_iso8601=require_iso8601, + allow_object=True) if tz_parsed is not None: if box: