Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Deprecate generic timestamp dtypes #15987

Merged
merged 2 commits into from
Apr 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,7 @@ Deprecations
- ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`)
- ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`)
- ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`)
- The ``Series`` constructor and ``.astype`` method have deprecated accepting timestamp dtypes without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15524`)
- ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`)
- ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`)
- ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`)
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,3 +839,30 @@ def test_constructor_cast_object(self):
s = Series(date_range('1/1/2000', periods=10), dtype=object)
exp = Series(date_range('1/1/2000', periods=10))
tm.assert_series_equal(s, exp)

def test_constructor_generic_timestamp_deprecated(self):
# see gh-15524

with tm.assert_produces_warning(FutureWarning):
dtype = np.timedelta64
s = Series([], dtype=dtype)

assert s.empty
assert s.dtype == 'm8[ns]'

with tm.assert_produces_warning(FutureWarning):
dtype = np.datetime64
s = Series([], dtype=dtype)

assert s.empty
assert s.dtype == 'M8[ns]'

# These timestamps have the wrong frequencies,
# so an Exception should be raised now.
msg = "cannot convert timedeltalike"
with tm.assertRaisesRegexp(TypeError, msg):
Series([], dtype='m8[ps]')

msg = "cannot convert datetimelike"
with tm.assertRaisesRegexp(TypeError, msg):
Series([], dtype='M8[ps]')
238 changes: 144 additions & 94 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612

import sys
import pytest

from datetime import datetime

import sys
import string
import warnings

from numpy import nan
import numpy as np
Expand All @@ -12,152 +16,199 @@

from pandas.compat import lrange, range, u
from pandas import compat
from pandas.util.testing import assert_series_equal
import pandas.util.testing as tm

from .common import TestData


class TestSeriesDtypes(TestData, tm.TestCase):
class TestSeriesDtypes(TestData):

def test_astype(self):
@pytest.mark.parametrize("dtype", ["float32", "float64",
"int64", "int32"])
def test_astype(self, dtype):
s = Series(np.random.randn(5), name='foo')
as_typed = s.astype(dtype)

for dtype in ['float32', 'float64', 'int64', 'int32']:
astyped = s.astype(dtype)
self.assertEqual(astyped.dtype, dtype)
self.assertEqual(astyped.name, s.name)
assert as_typed.dtype == dtype
assert as_typed.name == s.name

def test_dtype(self):

self.assertEqual(self.ts.dtype, np.dtype('float64'))
self.assertEqual(self.ts.dtypes, np.dtype('float64'))
self.assertEqual(self.ts.ftype, 'float64:dense')
self.assertEqual(self.ts.ftypes, 'float64:dense')
assert_series_equal(self.ts.get_dtype_counts(), Series(1, ['float64']))
assert_series_equal(self.ts.get_ftype_counts(), Series(
1, ['float64:dense']))

def test_astype_cast_nan_inf_int(self):
# GH14265, check nan and inf raise error when converting to int
types = [np.int32, np.int64]
values = [np.nan, np.inf]
assert self.ts.dtype == np.dtype('float64')
assert self.ts.dtypes == np.dtype('float64')
assert self.ts.ftype == 'float64:dense'
assert self.ts.ftypes == 'float64:dense'
tm.assert_series_equal(self.ts.get_dtype_counts(),
Series(1, ['float64']))
tm.assert_series_equal(self.ts.get_ftype_counts(),
Series(1, ['float64:dense']))

@pytest.mark.parametrize("value", [np.nan, np.inf])
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
def test_astype_cast_nan_inf_int(self, dtype, value):
# gh-14265: check NaN and inf raise error when converting to int
msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
s = Series([value])

for this_type in types:
for this_val in values:
s = Series([this_val])
with self.assertRaisesRegexp(ValueError, msg):
s.astype(this_type)
with tm.assertRaisesRegexp(ValueError, msg):
s.astype(dtype)

def test_astype_cast_object_int(self):
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
def test_astype_cast_object_int_fail(self, dtype):
arr = Series(["car", "house", "tree", "1"])
with pytest.raises(ValueError):
arr.astype(dtype)

self.assertRaises(ValueError, arr.astype, int)
self.assertRaises(ValueError, arr.astype, np.int64)
self.assertRaises(ValueError, arr.astype, np.int8)

def test_astype_cast_object_int(self):
arr = Series(['1', '2', '3', '4'], dtype=object)
result = arr.astype(int)
self.assert_series_equal(result, Series(np.arange(1, 5)))

tm.assert_series_equal(result, Series(np.arange(1, 5)))

def test_astype_datetimes(self):
import pandas._libs.tslib as tslib

s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5))

s = s.astype('O')
self.assertEqual(s.dtype, np.object_)
assert s.dtype == np.object_

s = Series([datetime(2001, 1, 2, 0, 0)])

s = s.astype('O')
self.assertEqual(s.dtype, np.object_)
assert s.dtype == np.object_

s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])

s[1] = np.nan
self.assertEqual(s.dtype, 'M8[ns]')
s = s.astype('O')
self.assertEqual(s.dtype, np.object_)
assert s.dtype == 'M8[ns]'

def test_astype_str(self):
# GH4405
digits = string.digits
s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)])
s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0])
types = (compat.text_type, np.str_)
for typ in types:
for s in (s1, s2):
res = s.astype(typ)
expec = s.map(compat.text_type)
assert_series_equal(res, expec)

# GH9757
# Test str and unicode on python 2.x and just str on python 3.x
for tt in set([str, compat.text_type]):
ts = Series([Timestamp('2010-01-04 00:00:00')])
s = ts.astype(tt)
expected = Series([tt('2010-01-04')])
assert_series_equal(s, expected)

ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
s = ts.astype(tt)
expected = Series([tt('2010-01-04 00:00:00-05:00')])
assert_series_equal(s, expected)

td = Series([Timedelta(1, unit='d')])
s = td.astype(tt)
expected = Series([tt('1 days 00:00:00.000000000')])
assert_series_equal(s, expected)
s = s.astype('O')
assert s.dtype == np.object_

@pytest.mark.parametrize("dtype", [compat.text_type, np.str_])
@pytest.mark.parametrize("series", [Series([string.digits * 10,
tm.rands(63),
tm.rands(64),
tm.rands(1000)]),
Series([string.digits * 10,
tm.rands(63),
tm.rands(64), nan, 1.0])])
def test_astype_str_map(self, dtype, series):
# see gh-4405
result = series.astype(dtype)
expected = series.map(compat.text_type)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("dtype", [str, compat.text_type])
def test_astype_str_cast(self, dtype):
# see gh-9757: test str and unicode on python 2.x
# and just str on python 3.x
ts = Series([Timestamp('2010-01-04 00:00:00')])
s = ts.astype(dtype)

expected = Series([dtype('2010-01-04')])
tm.assert_series_equal(s, expected)

ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
s = ts.astype(dtype)

expected = Series([dtype('2010-01-04 00:00:00-05:00')])
tm.assert_series_equal(s, expected)

td = Series([Timedelta(1, unit='d')])
s = td.astype(dtype)

expected = Series([dtype('1 days 00:00:00.000000000')])
tm.assert_series_equal(s, expected)

def test_astype_unicode(self):

# GH7758
# a bit of magic is required to set default encoding encoding to utf-8
# see gh-7758: A bit of magic is required to set
# default encoding to utf-8
digits = string.digits
test_series = [
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
Series([u('データーサイエンス、お前はもう死んでいる')]),

]

former_encoding = None

if not compat.PY3:
# in python we can force the default encoding for this test
# In Python, we can force the default encoding for this test
former_encoding = sys.getdefaultencoding()
reload(sys) # noqa

sys.setdefaultencoding("utf-8")
if sys.getdefaultencoding() == "utf-8":
test_series.append(Series([u('野菜食べないとやばい')
.encode("utf-8")]))

for s in test_series:
res = s.astype("unicode")
expec = s.map(compat.text_type)
assert_series_equal(res, expec)
# restore the former encoding
tm.assert_series_equal(res, expec)

# Restore the former encoding
if former_encoding is not None and former_encoding != "utf-8":
reload(sys) # noqa
sys.setdefaultencoding(former_encoding)

def test_astype_dict(self):
# GH7271
# see gh-7271
s = Series(range(0, 10, 2), name='abc')

result = s.astype({'abc': str})
expected = Series(['0', '2', '4', '6', '8'], name='abc')
assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)

result = s.astype({'abc': 'float64'})
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64',
name='abc')
assert_series_equal(result, expected)

self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str})
self.assertRaises(KeyError, s.astype, {0: str})

def test_complexx(self):
# GH4819
# complex access for ndarray compat
tm.assert_series_equal(result, expected)

with pytest.raises(KeyError):
s.astype({'abc': str, 'def': str})

with pytest.raises(KeyError):
s.astype({0: str})

def test_astype_generic_timestamp_deprecated(self):
# see gh-15524
data = [1]

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you try to remove the check_stacklevel here? (btw, easiest to try out what it should be is the set warnings to raise an error, and then you can count the number of stacks in the traceback)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that's fair. Done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't look like you removed this?

s = Series(data)
dtype = np.datetime64
result = s.astype(dtype)
expected = Series(data, dtype=dtype)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
s = Series(data)
dtype = np.timedelta64
result = s.astype(dtype)
expected = Series(data, dtype=dtype)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("dtype", np.typecodes['All'])
def test_astype_empty_constructor_equality(self, dtype):
# see gh-15524

if dtype not in ('S', 'V'): # poor support (if any) currently
with warnings.catch_warnings(record=True):
# Generic timestamp dtypes ('M' and 'm') are deprecated,
# but we test that already in series/test_constructors.py

init_empty = Series([], dtype=dtype)
as_type_empty = Series([]).astype(dtype)
tm.assert_series_equal(init_empty, as_type_empty)

def test_complex(self):
# see gh-4819: complex access for ndarray compat
a = np.arange(5, dtype=np.float64)
b = Series(a + 4j * a)

tm.assert_numpy_array_equal(a, b.real)
tm.assert_numpy_array_equal(4 * a, b.imag)

Expand All @@ -166,23 +217,22 @@ def test_complexx(self):
tm.assert_numpy_array_equal(4 * a, b.imag)

def test_arg_for_errors_in_astype(self):
# issue #14878

sr = Series([1, 2, 3])
# see gh-14878
s = Series([1, 2, 3])

with self.assertRaises(ValueError):
sr.astype(np.float64, errors=False)
with pytest.raises(ValueError):
s.astype(np.float64, errors=False)

with tm.assert_produces_warning(FutureWarning):
sr.astype(np.int8, raise_on_error=True)
s.astype(np.int8, raise_on_error=True)

sr.astype(np.int8, errors='raise')
s.astype(np.int8, errors='raise')

def test_intercept_astype_object(self):
series = Series(date_range('1/1/2000', periods=10))

# this test no longer makes sense as series is by default already
# M8[ns]
# This test no longer makes sense, as
# Series is by default already M8[ns].
expected = series.astype('object')

df = DataFrame({'a': series,
Expand All @@ -192,9 +242,9 @@ def test_intercept_astype_object(self):
tm.assert_series_equal(df.dtypes, exp_dtypes)

result = df.values.squeeze()
self.assertTrue((result[:, 0] == expected.values).all())
assert (result[:, 0] == expected.values).all()

df = DataFrame({'a': series, 'b': ['foo'] * len(series)})

result = df.values.squeeze()
self.assertTrue((result[:, 0] == expected.values).all())
assert (result[:, 0] == expected.values).all()
Loading