Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF/API: DatetimeTZDtype #23990

Merged
merged 20 commits into from
Dec 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,7 @@ Deprecations
- :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`)
- :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`)
- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`)
- Passing a string alias like ``'datetime64[ns, UTC]'`` as the `unit` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Expand Down
25 changes: 15 additions & 10 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,16 +1174,21 @@ def validate_tz_from_dtype(dtype, tz):
ValueError : on tzinfo mismatch
"""
if dtype is not None:
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
except TypeError:
pass
if isinstance(dtype, compat.string_types):
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
except TypeError:
# Things like `datetime64[ns]`, which is OK for the
# constructors, but also nonsense, which should be validated
# but not by us. We *do* allow non-existent tz errors to
# go through
pass
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
return tz


Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
resolution as libresolution, timezones)
import pandas.compat as compat
from pandas.errors import PerformanceWarning
from pandas.util._decorators import Appender, cache_readonly
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import (
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
Expand Down Expand Up @@ -333,7 +333,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
def _box_func(self):
return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)

@cache_readonly
@property
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this not cached?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very strange behavior I couldn't figure out.

In [13]: idx = pd.DatetimeIndex(['2000']).tz_localize("UTC")

In [14]: result = idx.tz_convert("US/Central")

In [15]: result
Out[15]: DatetimeIndex(['1999-12-31 18:00:00-06:00'], dtype='datetime64[ns, UTC]', freq=None)

In [16]: result.tz
Out[16]: <DstTzInfo 'US/Central' LMT-1 day, 18:09:00 STD>

I really don't know why, but that only occurred when the tz was cached.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right the tz itself can change depending on what date is being localized

def dtype(self):
if self.tz is None:
return _NS_DTYPE
Expand Down
32 changes: 0 additions & 32 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,38 +1767,6 @@ def is_complex_dtype(arr_or_dtype):
return issubclass(tipo, np.complexfloating)


def _coerce_to_dtype(dtype):
"""
Coerce a string or np.dtype to a pandas or numpy
dtype if possible.
If we cannot convert to a pandas dtype initially,
we convert to a numpy dtype.
Parameters
----------
dtype : The dtype that we want to coerce.
Returns
-------
pd_or_np_dtype : The coerced dtype.
"""

if is_categorical_dtype(dtype):
categories = getattr(dtype, 'categories', None)
ordered = getattr(dtype, 'ordered', False)
dtype = CategoricalDtype(categories=categories, ordered=ordered)
elif is_datetime64tz_dtype(dtype):
dtype = DatetimeTZDtype(dtype)
elif is_period_dtype(dtype):
dtype = PeriodDtype(dtype)
elif is_interval_dtype(dtype):
dtype = IntervalDtype(dtype)
else:
dtype = np.dtype(dtype)
return dtype


def _get_dtype(arr_or_dtype):
"""
Get the dtype instance associated with an array
Expand Down
133 changes: 81 additions & 52 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
""" define extension dtypes """

import re
import warnings

import numpy as np
import pytz

from pandas._libs.interval import Interval
from pandas._libs.tslibs import NaT, Period, Timestamp, timezones
Expand Down Expand Up @@ -491,64 +492,69 @@ class DatetimeTZDtype(PandasExtensionDtype):
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache = {}

def __new__(cls, unit=None, tz=None):
def __init__(self, unit="ns", tz=None):
"""
Create a new unit if needed, otherwise return from the cache
An ExtensionDtype for timezone-aware datetime data.
Parameters
----------
unit : string unit that this represents, currently must be 'ns'
tz : string tz that this represents
"""

if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

elif unit is None:
# we are called as an empty constructor
# generally for pickle compat
return object.__new__(cls)
unit : str, default "ns"
The precision of the datetime data. Currently limited
to ``"ns"``.
tz : str, int, or datetime.tzinfo
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved
The timezone.
elif tz is None:
Raises
------
pytz.UnknownTimeZoneError
When the requested timezone cannot be found.
# we were passed a string that we can construct
try:
m = cls._match.search(unit)
if m is not None:
unit = m.groupdict()['unit']
tz = timezones.maybe_get_tz(m.groupdict()['tz'])
except TypeError:
raise ValueError("could not construct DatetimeTZDtype")
Examples
--------
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC')
datetime64[ns, UTC]
elif isinstance(unit, compat.string_types):
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central')
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
"""
if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

if unit != 'ns':
if unit != 'ns':
if isinstance(unit, compat.string_types) and tz is None:
# maybe a string like datetime64[ns, tz], which we support for
# now.
result = type(self).construct_from_string(unit)
unit = result.unit
tz = result.tz
msg = (
"Passing a dtype alias like 'datetime64[ns, {tz}]' "
"to DatetimeTZDtype is deprecated. Use "
"'DatetimeTZDtype.construct_from_string()' instead."
)
warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2)
else:
raise ValueError("DatetimeTZDtype only supports ns units")

unit = unit
tz = tz
if tz:
tz = timezones.maybe_get_tz(tz)
elif tz is not None:
raise pytz.UnknownTimeZoneError(tz)
elif tz is None:
raise TypeError("A 'tz' is required.")

if tz is None:
raise ValueError("DatetimeTZDtype constructor must have a tz "
"supplied")
self._unit = unit
self._tz = tz

# hash with the actual tz if we can
# some cannot be hashed, so stringfy
try:
key = (unit, tz)
hash(key)
except TypeError:
key = (unit, str(tz))
@property
def unit(self):
"""The precision of the datetime data."""
return self._unit

# set/retrieve from cache
try:
return cls._cache[key]
except KeyError:
u = object.__new__(cls)
u.unit = unit
u.tz = tz
cls._cache[key] = u
return u
@property
def tz(self):
"""The timezone."""
return self._tz

@classmethod
def construct_array_type(cls):
Expand All @@ -565,24 +571,42 @@ def construct_array_type(cls):
@classmethod
def construct_from_string(cls, string):
"""
attempt to construct this type from a string, raise a TypeError if
it's not possible
Construct a DatetimeTZDtype from a string.
Parameters
----------
string : str
The string alias for this DatetimeTZDtype.
Should be formatted like ``datetime64[ns, <tz>]``,
where ``<tz>`` is the timezone name.
Examples
--------
>>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]')
datetime64[ns, UTC]
"""
msg = "Could not construct DatetimeTZDtype from '{}'"
try:
return cls(unit=string)
except ValueError:
raise TypeError("could not construct DatetimeTZDtype")
match = cls._match.match(string)
if match:
d = match.groupdict()
return cls(unit=d['unit'], tz=d['tz'])
except Exception:
# TODO(py3): Change this pass to `raise TypeError(msg) from e`
pass
raise TypeError(msg.format(string))

def __unicode__(self):
# format the tz
return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz)

@property
def name(self):
"""A string representation of the dtype."""
return str(self)

def __hash__(self):
# make myself hashable
# TODO: update this.
return hash(str(self))

def __eq__(self, other):
Expand All @@ -593,6 +617,11 @@ def __eq__(self, other):
self.unit == other.unit and
str(self.tz) == str(other.tz))

def __setstate__(self, state):
# for pickle compat.
self._tz = state['tz']
self._unit = state['unit']


class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _isna_ndarraylike(obj):
vec = libmissing.isnaobj(values.ravel())
result[...] = vec.reshape(shape)

elif needs_i8_conversion(obj):
elif needs_i8_conversion(dtype):
# this is the NaT pattern
result = values.view('i8') == iNaT
else:
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2669,11 +2669,10 @@ def _astype(self, dtype, **kwargs):
these automatically copy, so copy=True has no effect
raise on an except if raise == True
"""
dtype = pandas_dtype(dtype)

# if we are passed a datetime64[ns, tz]
if is_datetime64tz_dtype(dtype):
dtype = DatetimeTZDtype(dtype)

values = self.values
if getattr(values, 'tz', None) is None:
values = DatetimeIndex(values).tz_localize('UTC')
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def test_numpy_string_dtype(self):
'datetime64[ns, Asia/Tokyo]',
'datetime64[ns, UTC]'])
def test_datetimetz_dtype(self, dtype):
assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype)
assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype)
assert (com.pandas_dtype(dtype) ==
DatetimeTZDtype.construct_from_string(dtype))
assert com.pandas_dtype(dtype) == dtype

def test_categorical_dtype(self):
Expand Down
Loading