From c31e5410c88444f1789ee6a3c83424f40a0ba11a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 12 Apr 2017 13:39:10 -0400 Subject: [PATCH] DEPR: Deprecate generic timestamp dtypes We only use the nanosecond frequency, and numpy doesn't even handle generic timestamp dtypes well. xref gh-15524 (comment). --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/series/test_constructors.py | 24 ++++++++++++++++++ pandas/tests/series/test_dtypes.py | 32 ++++++++++++++++++++++++ pandas/types/cast.py | 25 ++++++++++++++++-- 4 files changed, 80 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a105a6801fb61..cb3e20e50380b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1204,6 +1204,7 @@ Deprecations - ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) - ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) - ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`) +- The ``Series`` constructor and ``.astype`` method have deprecated accepting timestamp dtypes without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15524`) - ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`) - ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`) - ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index dbe2db67359f3..c429be2680f18 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -839,3 +839,27 @@ def test_constructor_cast_object(self): s = Series(date_range('1/1/2000', periods=10), dtype=object) exp = Series(date_range('1/1/2000', periods=10)) tm.assert_series_equal(s, exp) + + def test_constructor_generic_timestamp(self): + # see gh-15524 + dtype = np.timedelta64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'm8[ns]' + + dtype = np.datetime64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'M8[ns]' + + # These timestamps have the wrong frequencies, + # so an Exception should be raised now. + msg = "cannot convert timedeltalike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='m8[ps]') + + msg = "cannot convert datetimelike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='M8[ps]') diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index a2aaff25516ae..1127c3381f57d 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -153,6 +153,38 @@ def test_astype_dict(self): self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str}) self.assertRaises(KeyError, s.astype, {0: str}) + def test_astype_generic_timestamp(self): + # see gh-15524 + data = [1] + + s = Series(data) + dtype = np.datetime64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + assert_series_equal(result, expected) + + s = Series(data) + dtype = np.timedelta64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + assert_series_equal(result, expected) + + def test_astype_empty_constructor_equality(self): + # see gh-15524 + + for dtype in np.typecodes['All']: + if dtype not in ('S', 'V'): # poor support (if any) currently + init_empty = Series([], dtype=dtype) + astype_empty = Series([]).astype(dtype) + + try: + assert_series_equal(init_empty, astype_empty) + except AssertionError as e: + name = np.dtype(dtype).name + msg = "{dtype} failed: ".format(dtype=name) + str(e) + + raise AssertionError(msg) + def test_complexx(self): # GH4819 # complex access for ndarray compat diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 85053dba0c18b..bc3cc79604ade 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -1,7 +1,10 @@ """ routings for casting """ from datetime import datetime, timedelta + import numpy as np +import warnings + from pandas._libs import tslib, lib from pandas._libs.tslib import iNaT from pandas.compat import string_types, text_type, PY3 @@ -620,6 +623,14 @@ def astype_nansafe(arr, dtype, copy=True): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + if dtype.name in ("datetime64", "timedelta64"): + msg = ("Passing in '{dtype}' dtype with no frequency is " + "deprecated and will raise in a future version. " + "Please pass in '{dtype}[ns]' instead.") + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=2) + dtype = np.dtype(dtype.name + "[ns]") + if copy: return arr.astype(dtype) return arr.view(dtype) @@ -871,8 +882,15 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): if is_datetime64 or is_datetime64tz or is_timedelta64: # force the dtype if needed + msg = ("Passing in '{dtype}' dtype with no frequency is " + "deprecated and will raise in a future version. " + "Please pass in '{dtype}[ns]' instead.") + if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name == 'datetime64[ns]': + if dtype.name in ('datetime64', 'datetime64[ns]'): + if dtype.name == 'datetime64': + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=2) dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " @@ -886,7 +904,10 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name == 'timedelta64[ns]': + if dtype.name in ('timedelta64', 'timedelta64[ns]'): + if dtype.name == 'timedelta64': + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=2) dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to "