From d581e3e6437fd1b8e8abc8f46a665bc2b8b85a8b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 12 Apr 2017 13:39:10 -0400 Subject: [PATCH] ENH: Handle generic timestamp dtypes with Series We only use the nanosecond frequency, so generic timestamp frequencies should be interpreted with the nanosecond frequency. xref gh-15524 (comment). --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/series/test_constructors.py | 24 ++++++++++++++++++ pandas/tests/series/test_dtypes.py | 32 ++++++++++++++++++++++++ pandas/types/cast.py | 12 +++++++-- 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fd1cd3d0022c96..faac02888af006 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -320,6 +320,7 @@ Other Enhancements ^^^^^^^^^^^^^^^^^^ - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- The ``Series`` constructor will now accept timestamp dtypes that do not specify frequency like ``np.datetime64`` (:issue:`15524`) - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index dbe2db67359f3f..c429be2680f18d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -839,3 +839,27 @@ def test_constructor_cast_object(self): s = Series(date_range('1/1/2000', periods=10), dtype=object) exp = Series(date_range('1/1/2000', periods=10)) tm.assert_series_equal(s, exp) + + def test_constructor_generic_timestamp(self): + # see gh-15524 + dtype = np.timedelta64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'm8[ns]' + + dtype = np.datetime64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'M8[ns]' + + # These timestamps have the wrong frequencies, + # so an Exception should be raised now. + msg = "cannot convert timedeltalike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='m8[ps]') + + msg = "cannot convert datetimelike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='M8[ps]') diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index a2aaff25516ae0..1127c3381f57db 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -153,6 +153,38 @@ def test_astype_dict(self): self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str}) self.assertRaises(KeyError, s.astype, {0: str}) + def test_astype_generic_timestamp(self): + # see gh-15524 + data = [1] + + s = Series(data) + dtype = np.datetime64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + assert_series_equal(result, expected) + + s = Series(data) + dtype = np.timedelta64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + assert_series_equal(result, expected) + + def test_astype_empty_constructor_equality(self): + # see gh-15524 + + for dtype in np.typecodes['All']: + if dtype not in ('S', 'V'): # poor support (if any) currently + init_empty = Series([], dtype=dtype) + astype_empty = Series([]).astype(dtype) + + try: + assert_series_equal(init_empty, astype_empty) + except AssertionError as e: + name = np.dtype(dtype).name + msg = "{dtype} failed: ".format(dtype=name) + str(e) + + raise AssertionError(msg) + def test_complexx(self): # GH4819 # complex access for ndarray compat diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 580ce12de33335..1209d6dc63895c 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -603,6 +603,14 @@ def astype_nansafe(arr, dtype, copy=True): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + # NumPy arrays don't handle generic timestamp dtypes well. Since + # we only use the nanosecond frequency, interpret generic timestamp + # dtypes as nanosecond frequency. + if dtype.name == "datetime64": + dtype = np.dtype('M8[ns]') + elif dtype.name == "timedelta64": + dtype = np.dtype('m8[ns]') + if copy: return arr.astype(dtype) return arr.view(dtype) @@ -855,7 +863,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): # force the dtype if needed if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name == 'datetime64[ns]': + if dtype.name in ('datetime64', 'datetime64[ns]'): dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " @@ -869,7 +877,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name == 'timedelta64[ns]': + if dtype.name in ('timedelta64', 'timedelta64[ns]'): dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to "