diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 1127c3381f57db..d9de72af55c3d5 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + import sys from datetime import datetime import string @@ -12,146 +14,160 @@ from pandas.compat import lrange, range, u from pandas import compat -from pandas.util.testing import assert_series_equal import pandas.util.testing as tm from .common import TestData -class TestSeriesDtypes(TestData, tm.TestCase): +class TestSeriesDtypes(TestData): - def test_astype(self): + @pytest.mark.parametrize("dtype", ["float32", "float64", + "int64", "int32"]) + def test_astype(self, dtype): s = Series(np.random.randn(5), name='foo') + as_typed = s.astype(dtype) - for dtype in ['float32', 'float64', 'int64', 'int32']: - astyped = s.astype(dtype) - self.assertEqual(astyped.dtype, dtype) - self.assertEqual(astyped.name, s.name) + assert as_typed.dtype == dtype + assert as_typed.name == s.name def test_dtype(self): - self.assertEqual(self.ts.dtype, np.dtype('float64')) - self.assertEqual(self.ts.dtypes, np.dtype('float64')) - self.assertEqual(self.ts.ftype, 'float64:dense') - self.assertEqual(self.ts.ftypes, 'float64:dense') - assert_series_equal(self.ts.get_dtype_counts(), Series(1, ['float64'])) - assert_series_equal(self.ts.get_ftype_counts(), Series( - 1, ['float64:dense'])) - - def test_astype_cast_nan_inf_int(self): - # GH14265, check nan and inf raise error when converting to int - types = [np.int32, np.int64] - values = [np.nan, np.inf] + assert self.ts.dtype == np.dtype('float64') + assert self.ts.dtypes == np.dtype('float64') + assert self.ts.ftype == 'float64:dense' + assert self.ts.ftypes == 'float64:dense' + tm.assert_series_equal(self.ts.get_dtype_counts(), + Series(1, ['float64'])) + tm.assert_series_equal(self.ts.get_ftype_counts(), + Series(1, ['float64:dense'])) + + @pytest.mark.parametrize("value", [np.nan, np.inf]) + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + def test_astype_cast_nan_inf_int(self, dtype, value): + # gh-14265: check NaN and inf raise error when converting to int msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' + s = Series([value]) - for this_type in types: - for this_val in values: - s = Series([this_val]) - with self.assertRaisesRegexp(ValueError, msg): - s.astype(this_type) + with tm.assertRaisesRegexp(ValueError, msg): + s.astype(dtype) - def test_astype_cast_object_int(self): + @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) + def test_astype_cast_object_int_fail(self, dtype): arr = Series(["car", "house", "tree", "1"]) + with pytest.raises(ValueError): + arr.astype(dtype) - self.assertRaises(ValueError, arr.astype, int) - self.assertRaises(ValueError, arr.astype, np.int64) - self.assertRaises(ValueError, arr.astype, np.int8) - + def test_astype_cast_object_int(self): arr = Series(['1', '2', '3', '4'], dtype=object) result = arr.astype(int) - self.assert_series_equal(result, Series(np.arange(1, 5))) + + tm.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_datetimes(self): import pandas._libs.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0)]) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) + s[1] = np.nan - self.assertEqual(s.dtype, 'M8[ns]') - s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == 'M8[ns]' - def test_astype_str(self): - # GH4405 - digits = string.digits - s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]) - s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0]) - types = (compat.text_type, np.str_) - for typ in types: - for s in (s1, s2): - res = s.astype(typ) - expec = s.map(compat.text_type) - assert_series_equal(res, expec) - - # GH9757 - # Test str and unicode on python 2.x and just str on python 3.x - for tt in set([str, compat.text_type]): - ts = Series([Timestamp('2010-01-04 00:00:00')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04')]) - assert_series_equal(s, expected) - - ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04 00:00:00-05:00')]) - assert_series_equal(s, expected) - - td = Series([Timedelta(1, unit='d')]) - s = td.astype(tt) - expected = Series([tt('1 days 00:00:00.000000000')]) - assert_series_equal(s, expected) + s = s.astype('O') + assert s.dtype == np.object_ + + @pytest.mark.parametrize("dtype", [compat.text_type, np.str_]) + @pytest.mark.parametrize("series", [Series([string.digits * 10, + tm.rands(63), + tm.rands(64), + tm.rands(1000)]), + Series([string.digits * 10, + tm.rands(63), + tm.rands(64), nan, 1.0])]) + def test_astype_str_map(self, dtype, series): + # see gh-4405 + result = series.astype(dtype) + expected = series.map(compat.text_type) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [str, compat.text_type]) + def test_astype_str_cast(self, dtype): + # see gh-9757: test str and unicode on python 2.x + # and just str on python 3.x + ts = Series([Timestamp('2010-01-04 00:00:00')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04')]) + tm.assert_series_equal(s, expected) + + ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04 00:00:00-05:00')]) + tm.assert_series_equal(s, expected) + + td = Series([Timedelta(1, unit='d')]) + s = td.astype(dtype) + + expected = Series([dtype('1 days 00:00:00.000000000')]) + tm.assert_series_equal(s, expected) def test_astype_unicode(self): - - # GH7758 - # a bit of magic is required to set default encoding encoding to utf-8 + # see gh-7758: A bit of magic is required to set + # default encoding to utf-8 digits = string.digits test_series = [ Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series([u('データーサイエンス、お前はもう死んでいる')]), - ] former_encoding = None + if not compat.PY3: - # in python we can force the default encoding for this test + # In Python, we can force the default encoding for this test former_encoding = sys.getdefaultencoding() reload(sys) # noqa + sys.setdefaultencoding("utf-8") if sys.getdefaultencoding() == "utf-8": test_series.append(Series([u('野菜食べないとやばい') .encode("utf-8")])) + for s in test_series: res = s.astype("unicode") expec = s.map(compat.text_type) - assert_series_equal(res, expec) - # restore the former encoding + tm.assert_series_equal(res, expec) + + # Restore the former encoding if former_encoding is not None and former_encoding != "utf-8": reload(sys) # noqa sys.setdefaultencoding(former_encoding) def test_astype_dict(self): - # GH7271 + # see gh-7271 s = Series(range(0, 10, 2), name='abc') result = s.astype({'abc': str}) expected = Series(['0', '2', '4', '6', '8'], name='abc') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s.astype({'abc': 'float64'}) expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64', name='abc') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + with pytest.raises(KeyError): + s.astype({'abc': str, 'def': str}) - self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str}) - self.assertRaises(KeyError, s.astype, {0: str}) + with pytest.raises(KeyError): + s.astype({0: str}) def test_astype_generic_timestamp(self): # see gh-15524 @@ -161,35 +177,35 @@ def test_astype_generic_timestamp(self): dtype = np.datetime64 result = s.astype(dtype) expected = Series(data, dtype=dtype) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = Series(data) dtype = np.timedelta64 result = s.astype(dtype) expected = Series(data, dtype=dtype) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) - def test_astype_empty_constructor_equality(self): + @pytest.mark.parametrize("dtype", np.typecodes['All']) + def test_astype_empty_constructor_equality(self, dtype): # see gh-15524 - for dtype in np.typecodes['All']: - if dtype not in ('S', 'V'): # poor support (if any) currently - init_empty = Series([], dtype=dtype) - astype_empty = Series([]).astype(dtype) + if dtype not in ('S', 'V'): # poor support (if any) currently + init_empty = Series([], dtype=dtype) + astype_empty = Series([]).astype(dtype) - try: - assert_series_equal(init_empty, astype_empty) - except AssertionError as e: - name = np.dtype(dtype).name - msg = "{dtype} failed: ".format(dtype=name) + str(e) + try: + tm.assert_series_equal(init_empty, astype_empty) + except AssertionError as e: + name = np.dtype(dtype).name + msg = "{dtype} failed: ".format(dtype=name) + str(e) - raise AssertionError(msg) + raise AssertionError(msg) - def test_complexx(self): - # GH4819 - # complex access for ndarray compat + def test_complex(self): + # see gh-4819: complex access for ndarray compat a = np.arange(5, dtype=np.float64) b = Series(a + 4j * a) + tm.assert_numpy_array_equal(a, b.real) tm.assert_numpy_array_equal(4 * a, b.imag) @@ -198,23 +214,22 @@ def test_complexx(self): tm.assert_numpy_array_equal(4 * a, b.imag) def test_arg_for_errors_in_astype(self): - # issue #14878 - - sr = Series([1, 2, 3]) + # see gh-14878 + s = Series([1, 2, 3]) - with self.assertRaises(ValueError): - sr.astype(np.float64, errors=False) + with pytest.raises(ValueError): + s.astype(np.float64, errors=False) with tm.assert_produces_warning(FutureWarning): - sr.astype(np.int8, raise_on_error=True) + s.astype(np.int8, raise_on_error=True) - sr.astype(np.int8, errors='raise') + s.astype(np.int8, errors='raise') def test_intercept_astype_object(self): series = Series(date_range('1/1/2000', periods=10)) - # this test no longer makes sense as series is by default already - # M8[ns] + # This test no longer makes sense, as + # Series is by default already M8[ns]. expected = series.astype('object') df = DataFrame({'a': series, @@ -224,9 +239,9 @@ def test_intercept_astype_object(self): tm.assert_series_equal(df.dtypes, exp_dtypes) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() df = DataFrame({'a': series, 'b': ['foo'] * len(series)}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all()