From e11cc2a522f34871d9b5147f716bdc191cc3823e Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 9 Apr 2016 14:06:40 +0900 Subject: [PATCH] TST: Add numeric coercion tests --- pandas/tests/indexing/test_coercion.py | 540 +++++++++++++++++++++++++ pandas/tests/series/test_analytics.py | 209 ---------- pandas/tests/series/test_replace.py | 225 +++++++++++ pandas/tests/test_common.py | 102 ++++- 4 files changed, 866 insertions(+), 210 deletions(-) create mode 100644 pandas/tests/indexing/test_coercion.py create mode 100644 pandas/tests/series/test_replace.py diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py new file mode 100644 index 0000000000000..3585feacda8c2 --- /dev/null +++ b/pandas/tests/indexing/test_coercion.py @@ -0,0 +1,540 @@ +# -*- coding: utf-8 -*- + +import nose +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.compat as compat + + +############################################################### +# Index / Series common tests which may trigger dtype coercions +############################################################### + + +class TestIndexCoercion(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_setitem_index_numeric_coercion_int(self): + # tests setitem with non-existing numeric key + s = pd.Series([1, 2, 3, 4]) + self.assertEqual(s.index.dtype, np.int64) + + # int + int -> int + temp = s.copy() + temp[5] = 5 + tm.assert_series_equal(temp, pd.Series([1, 2, 3, 4, 5], + index=[0, 1, 2, 3, 5])) + self.assertEqual(temp.index.dtype, np.int64) + + # int + float -> float + temp = s.copy() + temp[1.1] = 5 + tm.assert_series_equal(temp, pd.Series([1, 2, 3, 4, 5], + index=[0, 1, 2, 3, 1.1])) + self.assertEqual(temp.index.dtype, np.float64) + + def test_setitem_index_numeric_coercion_float(self): + # tests setitem with non-existing numeric key + s = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1]) + self.assertEqual(s.index.dtype, np.float64) + + # float + int -> int + temp = s.copy() + # TODO_GH12747 The result must be float + with tm.assertRaises(IndexError): + temp[5] = 5 + + # float + float -> float + temp = s.copy() + temp[5.1] = 5 + exp = pd.Series([1, 2, 3, 4, 5], index=[1.1, 2.1, 3.1, 4.1, 5.1]) + tm.assert_series_equal(temp, exp) + self.assertEqual(temp.index.dtype, np.float64) + + def test_insert_numeric_coercion_int(self): + idx = pd.Int64Index([1, 2, 3, 4]) + self.assertEqual(idx.dtype, np.int64) + + # int + int -> int + res = idx.insert(1, 1) + tm.assert_index_equal(res, pd.Index([1, 1, 2, 3, 4])) + self.assertEqual(res.dtype, np.int64) + + # int + float -> float + res = idx.insert(1, 1.1) + tm.assert_index_equal(res, pd.Index([1, 1.1, 2, 3, 4])) + self.assertEqual(res.dtype, np.float64) + + # int + bool -> int + res = idx.insert(1, False) + tm.assert_index_equal(res, pd.Index([1, 0, 2, 3, 4])) + self.assertEqual(res.dtype, np.int64) + + def test_insert_numeric_coercion_float(self): + idx = pd.Float64Index([1, 2, 3, 4]) + self.assertEqual(idx.dtype, np.float64) + + # float + int -> int + res = idx.insert(1, 1) + tm.assert_index_equal(res, pd.Index([1., 1., 2., 3., 4.])) + self.assertEqual(res.dtype, np.float64) + + # float + float -> float + res = idx.insert(1, 1.1) + tm.assert_index_equal(res, pd.Index([1., 1.1, 2., 3., 4.])) + self.assertEqual(res.dtype, np.float64) + + # float + bool -> float + res = idx.insert(1, False) + tm.assert_index_equal(res, pd.Index([1., 0., 2., 3., 4.])) + self.assertEqual(res.dtype, np.float64) + + +class TestSeriesCoercion(tm.TestCase): + + _multiprocess_can_split_ = True + + def setUp(self): + self.rep = {} + self.rep['object'] = ['a', 'b'] + self.rep['int64'] = [4, 5] + self.rep['float64'] = [1.1, 2.2] + self.rep['complex128'] = [1 + 1j, 2 + 2j] + self.rep['bool'] = [True, False] + + def test_setitem_numeric_coercion_int(self): + s = pd.Series([1, 2, 3, 4]) + self.assertEqual(s.dtype, np.int64) + + # int + int -> int + temp = s.copy() + temp[1] = 1 + tm.assert_series_equal(temp, pd.Series([1, 1, 3, 4])) + self.assertEqual(temp.dtype, np.int64) + + # int + float -> float + # TODO_GH12747 The result must be float + temp = s.copy() + temp[1] = 1.1 + # tm.assert_series_equal(temp, pd.Series([1, 1.1, 3, 4])) + # self.assertEqual(temp.dtype, np.float64) + tm.assert_series_equal(temp, pd.Series([1, 1, 3, 4])) + self.assertEqual(temp.dtype, np.int64) + + # int + complex -> complex + temp = s.copy() + temp[1] = 1 + 1j + tm.assert_series_equal(temp, pd.Series([1, 1 + 1j, 3, 4])) + self.assertEqual(temp.dtype, np.complex128) + + # int + bool -> int + temp = s.copy() + temp[1] = True + tm.assert_series_equal(temp, pd.Series([1, 1, 3, 4])) + self.assertEqual(temp.dtype, np.int64) + + def test_setitem_numeric_coercion_float(self): + s = pd.Series([1.1, 2.2, 3.3, 4.4]) + self.assertEqual(s.dtype, np.float64) + + # float + int -> float + temp = s.copy() + temp[1] = 1 + tm.assert_series_equal(temp, pd.Series([1.1, 1.0, 3.3, 4.4])) + self.assertEqual(temp.dtype, np.float64) + + # float + float -> float + temp = s.copy() + temp[1] = 1.1 + tm.assert_series_equal(temp, pd.Series([1.1, 1.1, 3.3, 4.4])) + self.assertEqual(temp.dtype, np.float64) + + # float + complex -> complex + temp = s.copy() + temp[1] = 1 + 1j + tm.assert_series_equal(temp, pd.Series([1.1, 1 + 1j, 3.3, 4.4])) + self.assertEqual(temp.dtype, np.complex128) + + # float + bool -> float + temp = s.copy() + temp[1] = True + tm.assert_series_equal(temp, pd.Series([1.1, 1.0, 3.3, 4.4])) + self.assertEqual(temp.dtype, np.float64) + + def test_setitem_numeric_coercion_complex(self): + s = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) + self.assertEqual(s.dtype, np.complex128) + + # complex + int -> complex + temp = s.copy() + temp[1] = 1 + tm.assert_series_equal(temp, pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j])) + self.assertEqual(temp.dtype, np.complex128) + + # complex + float -> complex + temp = s.copy() + temp[1] = 1.1 + tm.assert_series_equal(temp, pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j])) + self.assertEqual(temp.dtype, np.complex128) + + # complex + complex -> complex + temp = s.copy() + temp[1] = 1 + 1j + tm.assert_series_equal(temp, + pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j])) + self.assertEqual(temp.dtype, np.complex128) + + # complex + bool -> complex + temp = s.copy() + temp[1] = True + tm.assert_series_equal(temp, pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j])) + self.assertEqual(temp.dtype, np.complex128) + + def test_setitem_numeric_coercion_bool(self): + s = pd.Series([True, False, True, False]) + self.assertEqual(s.dtype, np.bool) + + # bool + int -> int + # TODO_GH12747 The result must be int + temp = s.copy() + temp[1] = 1 + # tm.assert_series_equal(temp, pd.Series([1, 1, 1, 0])) + # self.assertEqual(temp.dtype, np.int64) + tm.assert_series_equal(temp, pd.Series([True, True, True, False])) + self.assertEqual(temp.dtype, np.bool) + + # TODO_GH12747 The result must be int + temp = s.copy() + temp[1] = 3 # greater than bool + # tm.assert_series_equal(temp, pd.Series([1, 3, 1, 0])) + # self.assertEqual(temp.dtype, np.int64) + tm.assert_series_equal(temp, pd.Series([True, True, True, False])) + self.assertEqual(temp.dtype, np.bool) + + # bool + float -> float + # TODO_GH12747 The result must be float + temp = s.copy() + temp[1] = 1.1 + # tm.assert_series_equal(temp, pd.Series([1., 1.1, 1., 0.])) + # self.assertEqual(temp.dtype, np.float64) + tm.assert_series_equal(temp, pd.Series([True, True, True, False])) + self.assertEqual(temp.dtype, np.bool) + + # bool + complex -> complex (buggy, results in bool) + # TODO_GH12747 The result must be complex + temp = s.copy() + temp[1] = 1 + 1j + # tm.assert_series_equal(temp, pd.Series([1, 1 + 1j, 1, 0])) + # self.assertEqual(temp.dtype, np.complex128) + tm.assert_series_equal(temp, pd.Series([True, True, True, False])) + self.assertEqual(temp.dtype, np.bool) + + # bool + bool -> int + temp = s.copy() + temp[1] = True + tm.assert_series_equal(temp, pd.Series([True, True, True, False])) + self.assertEqual(temp.dtype, np.bool) + + def test_where_numeric_coercion_int(self): + s = pd.Series([1, 2, 3, 4]) + self.assertEqual(s.dtype, np.int64) + cond = pd.Series([True, False, True, False]) + + # int + int -> int + res = s.where(cond, 1) + tm.assert_series_equal(res, pd.Series([1, 1, 3, 1])) + self.assertEqual(res.dtype, np.int64) + res = s.where(cond, pd.Series([5, 6, 7, 8])) + tm.assert_series_equal(res, pd.Series([1, 6, 3, 8])) + self.assertEqual(res.dtype, np.int64) + + # int + float -> float + res = s.where(cond, 1.1) + tm.assert_series_equal(res, pd.Series([1, 1.1, 3, 1.1])) + self.assertEqual(res.dtype, np.float64) + res = s.where(cond, pd.Series([5.5, 6.6, 7.7, 8.8])) + tm.assert_series_equal(res, pd.Series([1, 6.6, 3, 8.8])) + self.assertEqual(res.dtype, np.float64) + + # int + complex -> complex + res = s.where(cond, 1 + 1j) + tm.assert_series_equal(res, pd.Series([1, 1 + 1j, 3, 1 + 1j])) + self.assertEqual(res.dtype, np.complex128) + res = s.where(cond, pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j])) + tm.assert_series_equal(res, pd.Series([1, 6 + 6j, 3, 8 + 8j])) + self.assertEqual(res.dtype, np.complex128) + + # int + bool -> int + res = s.where(cond, True) + tm.assert_series_equal(res, pd.Series([1, 1, 3, 1])) + self.assertEqual(res.dtype, np.int64) + res = s.where(cond, pd.Series([True, False, True, True])) + tm.assert_series_equal(res, pd.Series([1, 0, 3, 1])) + self.assertEqual(res.dtype, np.int64) + + def test_where_numeric_coercion_float(self): + s = pd.Series([1.1, 2.2, 3.3, 4.4]) + self.assertEqual(s.dtype, np.float64) + cond = pd.Series([True, False, True, False]) + + # float + int -> float + res = s.where(cond, 1) + tm.assert_series_equal(res, pd.Series([1.1, 1.0, 3.3, 1.0])) + self.assertEqual(res.dtype, np.float64) + res = s.where(cond, pd.Series([5, 6, 7, 8])) + tm.assert_series_equal(res, pd.Series([1.1, 6.0, 3.3, 8.0])) + self.assertEqual(res.dtype, np.float64) + + # float + float -> float + res = s.where(cond, 1.1) + tm.assert_series_equal(res, pd.Series([1.1, 1.1, 3.3, 1.1])) + self.assertEqual(res.dtype, np.float64) + res = s.where(cond, pd.Series([5.5, 6.6, 7.7, 8.8])) + tm.assert_series_equal(res, pd.Series([1.1, 6.6, 3.3, 8.8])) + self.assertEqual(res.dtype, np.float64) + + # float + complex -> complex + res = s.where(cond, 1 + 1j) + tm.assert_series_equal(res, pd.Series([1.1, 1 + 1j, 3.3, 1 + 1j])) + self.assertEqual(res.dtype, np.complex128) + res = s.where(cond, pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j])) + tm.assert_series_equal(res, pd.Series([1.1, 6 + 6j, 3.3, 8 + 8j])) + self.assertEqual(res.dtype, np.complex128) + + # float + bool -> float + res = s.where(cond, True) + tm.assert_series_equal(res, pd.Series([1.1, 1.0, 3.3, 1.0])) + self.assertEqual(res.dtype, np.float64) + res = s.where(cond, pd.Series([True, False, True, True])) + tm.assert_series_equal(res, pd.Series([1.1, 0.0, 3.3, 1.0])) + self.assertEqual(res.dtype, np.float64) + + def test_where_numeric_coercion_complex(self): + s = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) + self.assertEqual(s.dtype, np.complex128) + cond = pd.Series([True, False, True, False]) + + # complex + int -> float + res = s.where(cond, 1) + tm.assert_series_equal(res, pd.Series([1 + 1j, 1, 3 + 3j, 1])) + self.assertEqual(res.dtype, np.complex128) + res = s.where(cond, pd.Series([5, 6, 7, 8])) + tm.assert_series_equal(res, pd.Series([1 + 1j, 6.0, 3 + 3j, 8.0])) + self.assertEqual(res.dtype, np.complex128) + + # complex + float -> float + res = s.where(cond, 1.1) + tm.assert_series_equal(res, pd.Series([1 + 1j, 1.1, 3 + 3j, 1.1])) + self.assertEqual(res.dtype, np.complex128) + res = s.where(cond, pd.Series([5.5, 6.6, 7.7, 8.8])) + tm.assert_series_equal(res, pd.Series([1 + 1j, 6.6, 3 + 3j, 8.8])) + self.assertEqual(res.dtype, np.complex128) + + # complex + complex -> complex + res = s.where(cond, 1 + 1j) + tm.assert_series_equal(res, + pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 1 + 1j])) + self.assertEqual(res.dtype, np.complex128) + res = s.where(cond, pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j])) + tm.assert_series_equal(res, + pd.Series([1 + 1j, 6 + 6j, 3 + 3j, 8 + 8j])) + self.assertEqual(res.dtype, np.complex128) + + # complex + bool -> complex + res = s.where(cond, True) + tm.assert_series_equal(res, pd.Series([1 + 1j, 1, 3 + 3j, 1])) + self.assertEqual(res.dtype, np.complex128) + res = s.where(cond, pd.Series([True, False, True, True])) + tm.assert_series_equal(res, pd.Series([1 + 1j, 0, 3 + 3j, 1])) + self.assertEqual(res.dtype, np.complex128) + + def test_where_numeric_coercion_bool(self): + s = pd.Series([True, False, True, False]) + self.assertEqual(s.dtype, np.bool) + cond = pd.Series([True, False, True, False]) + + # bool + int -> int + res = s.where(cond, 1) + tm.assert_series_equal(res, pd.Series([1, 1, 1, 1])) + self.assertEqual(res.dtype, np.int64) + res = s.where(cond, pd.Series([5, 6, 7, 8])) + tm.assert_series_equal(res, pd.Series([1, 6, 1, 8])) + self.assertEqual(res.dtype, np.int64) + + # bool + float -> float + res = s.where(cond, 1.1) + tm.assert_series_equal(res, pd.Series([1.0, 1.1, 1.0, 1.1])) + self.assertEqual(res.dtype, np.float64) + res = s.where(cond, pd.Series([5.5, 6.6, 7.7, 8.8])) + tm.assert_series_equal(res, pd.Series([1.0, 6.6, 1.0, 8.8])) + self.assertEqual(res.dtype, np.float64) + + # bool + complex -> complex + res = s.where(cond, 1 + 1j) + tm.assert_series_equal(res, pd.Series([1, 1 + 1j, 1, 1 + 1j])) + self.assertEqual(res.dtype, np.complex128) + res = s.where(cond, pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j])) + tm.assert_series_equal(res, pd.Series([1, 6 + 6j, 1, 8 + 8j])) + self.assertEqual(res.dtype, np.complex128) + + # bool + bool -> bool + res = s.where(cond, True) + tm.assert_series_equal(res, pd.Series([True, True, True, True])) + self.assertEqual(res.dtype, np.bool) + res = s.where(cond, pd.Series([True, False, True, True])) + tm.assert_series_equal(res, pd.Series([True, False, True, True])) + self.assertEqual(res.dtype, np.bool) + + # not indexing, but place here for consisntency + + def test_fillna_numeric_coercion_int(self): + # int can't hold NaN + pass + + def test_fillna_numeric_coercion_float(self): + s = pd.Series([1.1, np.nan, 3.3, 4.4]) + self.assertEqual(s.dtype, np.float64) + + # float + int -> float + res = s.fillna(1) + tm.assert_series_equal(res, pd.Series([1.1, 1.0, 3.3, 4.4])) + self.assertEqual(res.dtype, np.float64) + + # float + float -> float + res = s.fillna(1.1) + tm.assert_series_equal(res, pd.Series([1.1, 1.1, 3.3, 4.4])) + self.assertEqual(res.dtype, np.float64) + + # float + complex -> complex + res = s.fillna(1 + 1j) + tm.assert_series_equal(res, pd.Series([1.1, 1 + 1j, 3.3, 4.4])) + self.assertEqual(res.dtype, np.complex128) + + # float + bool -> float + res = s.fillna(True) + tm.assert_series_equal(res, pd.Series([1.1, 1.0, 3.3, 4.4])) + self.assertEqual(res.dtype, np.float64) + + def test_fillna_numeric_coercion_complex(self): + s = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) + self.assertEqual(s.dtype, np.complex128) + + # complex + int -> complex + res = s.fillna(1) + tm.assert_series_equal(res, pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j])) + self.assertEqual(res.dtype, np.complex128) + + # complex + float -> complex + res = s.fillna(1.1) + tm.assert_series_equal(res, pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j])) + self.assertEqual(res.dtype, np.complex128) + + # complex + complex -> complex + res = s.fillna(1 + 1j) + tm.assert_series_equal(res, + pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j])) + self.assertEqual(res.dtype, np.complex128) + + # complex + bool -> complex + res = s.fillna(True) + tm.assert_series_equal(res, pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j])) + self.assertEqual(res.dtype, np.complex128) + + def test_fillna_numeric_coercion_bool(self): + # bool can't hold NaN + pass + + def _assert_replace_conversion(self, from_key, to_key, how): + index = pd.Index([3, 4], name='xxx') + s = pd.Series(self.rep[from_key], index=index, name='yyy') + self.assertEqual(s.dtype, from_key) + + if how == 'dict': + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == 'series': + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError + + result = s.replace(replacer) + + if ((from_key == 'float64' and + to_key in ('bool', 'int64')) or + + (from_key == 'complex128' and + to_key in ('bool', 'int64', 'float64')) or + + (from_key == 'int64' and + to_key in ('bool')) or + + # TODO_GH12747 The result must be int? + (from_key == 'bool' and to_key in ('int64'))): + + # Expected: do not downcast by replacement + exp = pd.Series(self.rep[to_key], index=index, + name='yyy', dtype=from_key) + + else: + exp = pd.Series(self.rep[to_key], index=index, name='yyy') + self.assertEqual(exp.dtype, to_key) + + tm.assert_series_equal(result, exp) + + def test_replace_conversion_dict_from_object(self): + from_key = 'object' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + def test_replace_conversion_dict_from_int(self): + from_key = 'int64' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + def test_replace_conversion_dict_from_float(self): + from_key = 'float64' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + def test_replace_conversion_dict_from_complex(self): + from_key = 'complex128' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + def test_replace_conversion_dict_from_bool(self): + from_key = 'bool' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + # Series + def test_replace_conversion_series_from_object(self): + from_key = 'object' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') + + def test_replace_conversion_series_from_int(self): + from_key = 'int64' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') + + def test_replace_conversion_series_from_float(self): + from_key = 'float64' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') + + def test_replace_conversion_series_from_complex(self): + from_key = 'complex128' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') + + def test_replace_conversion_series_from_bool(self): + from_key = 'bool' + for to_key in self.rep: + + if compat.PY3: + # doesn't work in PY3, though ...dict_from_bool works fine + raise nose.SkipTest("doesn't work as in PY3") + + self._assert_replace_conversion(from_key, to_key, how='series') diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 2edd8b752aeff..9182b16d1f5b5 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -17,7 +17,6 @@ from pandas.tseries.index import Timestamp from pandas.tseries.tdi import Timedelta import pandas.core.config as cf -import pandas.lib as lib import pandas.core.nanops as nanops @@ -1283,214 +1282,6 @@ def test_unique_data_ownership(self): # it works! #1807 Series(Series(["a", "c", "b"]).unique()).sort_values() - def test_replace(self): - N = 100 - ser = Series(np.random.randn(N)) - ser[0:4] = np.nan - ser[6:10] = 0 - - # replace list with a single value - ser.replace([np.nan], -1, inplace=True) - - exp = ser.fillna(-1) - assert_series_equal(ser, exp) - - rs = ser.replace(0., np.nan) - ser[ser == 0.] = np.nan - assert_series_equal(rs, ser) - - ser = Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), - dtype=object) - ser[:5] = np.nan - ser[6:10] = 'foo' - ser[20:30] = 'bar' - - # replace list with a single value - rs = ser.replace([np.nan, 'foo', 'bar'], -1) - - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -1).all()) - self.assertTrue((rs[20:30] == -1).all()) - self.assertTrue((isnull(ser[:5])).all()) - - # replace with different values - rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) - - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -2).all()) - self.assertTrue((rs[20:30] == -3).all()) - self.assertTrue((isnull(ser[:5])).all()) - - # replace with different values with 2 lists - rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) - assert_series_equal(rs, rs2) - - # replace inplace - ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) - - self.assertTrue((ser[:5] == -1).all()) - self.assertTrue((ser[6:10] == -1).all()) - self.assertTrue((ser[20:30] == -1).all()) - - ser = Series([np.nan, 0, np.inf]) - assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - - ser = Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT]) - assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - filled = ser.copy() - filled[4] = 0 - assert_series_equal(ser.replace(np.inf, 0), filled) - - ser = Series(self.ts.index) - assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - - # malformed - self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) - - # make sure that we aren't just masking a TypeError because bools don't - # implement indexing - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): - ser.replace([1, 2], [np.nan, 0]) - - ser = Series([0, 1, 2, 3, 4]) - result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) - assert_series_equal(result, Series([4, 3, 2, 1, 0])) - - # API change from 0.12? - # GH 5319 - ser = Series([0, np.nan, 2, 3, 4]) - expected = ser.ffill() - result = ser.replace([np.nan]) - assert_series_equal(result, expected) - - ser = Series([0, np.nan, 2, 3, 4]) - expected = ser.ffill() - result = ser.replace(np.nan) - assert_series_equal(result, expected) - # GH 5797 - ser = Series(date_range('20130101', periods=5)) - expected = ser.copy() - expected.loc[2] = Timestamp('20120101') - result = ser.replace({Timestamp('20130103'): Timestamp('20120101')}) - assert_series_equal(result, expected) - result = ser.replace(Timestamp('20130103'), Timestamp('20120101')) - assert_series_equal(result, expected) - - def test_replace_with_single_list(self): - ser = Series([0, 1, 2, 3, 4]) - result = ser.replace([1, 2, 3]) - assert_series_equal(result, Series([0, 0, 0, 0, 4])) - - s = ser.copy() - s.replace([1, 2, 3], inplace=True) - assert_series_equal(s, Series([0, 0, 0, 0, 4])) - - # make sure things don't get corrupted when fillna call fails - s = ser.copy() - with tm.assertRaises(ValueError): - s.replace([1, 2, 3], inplace=True, method='crash_cymbal') - assert_series_equal(s, ser) - - def test_replace_mixed_types(self): - s = Series(np.arange(5), dtype='int64') - - def check_replace(to_rep, val, expected): - sc = s.copy() - r = s.replace(to_rep, val) - sc.replace(to_rep, val, inplace=True) - assert_series_equal(expected, r) - assert_series_equal(expected, sc) - - # should NOT upcast to float - e = Series([0, 1, 2, 3, 4]) - tr, v = [3], [3.0] - check_replace(tr, v, e) - - # MUST upcast to float - e = Series([0, 1, 2, 3.5, 4]) - tr, v = [3], [3.5] - check_replace(tr, v, e) - - # casts to object - e = Series([0, 1, 2, 3.5, 'a']) - tr, v = [3, 4], [3.5, 'a'] - check_replace(tr, v, e) - - # again casts to object - e = Series([0, 1, 2, 3.5, Timestamp('20130101')]) - tr, v = [3, 4], [3.5, Timestamp('20130101')] - check_replace(tr, v, e) - - # casts to float - e = Series([0, 1, 2, 3.5, 1]) - tr, v = [3, 4], [3.5, True] - check_replace(tr, v, e) - - # test an object with dates + floats + integers + strings - dr = date_range('1/1/2001', '1/10/2001', - freq='D').to_series().reset_index(drop=True) - result = dr.astype(object).replace( - [dr[0], dr[1], dr[2]], [1.0, 2, 'a']) - expected = Series([1.0, 2, 'a'] + dr[3:].tolist(), dtype=object) - assert_series_equal(result, expected) - - def test_replace_bool_with_string_no_op(self): - s = Series([True, False, True]) - result = s.replace('fun', 'in-the-sun') - tm.assert_series_equal(s, result) - - def test_replace_bool_with_string(self): - # nonexistent elements - s = Series([True, False, True]) - result = s.replace(True, '2u') - expected = Series(['2u', False, '2u']) - tm.assert_series_equal(expected, result) - - def test_replace_bool_with_bool(self): - s = Series([True, False, True]) - result = s.replace(True, False) - expected = Series([False] * len(s)) - tm.assert_series_equal(expected, result) - - def test_replace_with_dict_with_bool_keys(self): - s = Series([True, False, True]) - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): - s.replace({'asdf': 'asdb', True: 'yes'}) - - def test_replace2(self): - N = 100 - ser = Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), - dtype=object) - ser[:5] = np.nan - ser[6:10] = 'foo' - ser[20:30] = 'bar' - - # replace list with a single value - rs = ser.replace([np.nan, 'foo', 'bar'], -1) - - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -1).all()) - self.assertTrue((rs[20:30] == -1).all()) - self.assertTrue((isnull(ser[:5])).all()) - - # replace with different values - rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) - - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -2).all()) - self.assertTrue((rs[20:30] == -3).all()) - self.assertTrue((isnull(ser[:5])).all()) - - # replace with different values with 2 lists - rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) - assert_series_equal(rs, rs2) - - # replace inplace - ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) - self.assertTrue((ser[:5] == -1).all()) - self.assertTrue((ser[6:10] == -1).all()) - self.assertTrue((ser[20:30] == -1).all()) - def test_repeat(self): s = Series(np.random.randn(3), index=['a', 'b', 'c']) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py new file mode 100644 index 0000000000000..d80328ea3863a --- /dev/null +++ b/pandas/tests/series/test_replace.py @@ -0,0 +1,225 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +import numpy as np +import pandas as pd +import pandas.lib as lib +import pandas.util.testing as tm + +from .common import TestData + + +class TestSeriesReplace(TestData, tm.TestCase): + + _multiprocess_can_split_ = True + + def test_replace(self): + N = 100 + ser = pd.Series(np.random.randn(N)) + ser[0:4] = np.nan + ser[6:10] = 0 + + # replace list with a single value + ser.replace([np.nan], -1, inplace=True) + + exp = ser.fillna(-1) + tm.assert_series_equal(ser, exp) + + rs = ser.replace(0., np.nan) + ser[ser == 0.] = np.nan + tm.assert_series_equal(rs, ser) + + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), + dtype=object) + ser[:5] = np.nan + ser[6:10] = 'foo' + ser[20:30] = 'bar' + + # replace list with a single value + rs = ser.replace([np.nan, 'foo', 'bar'], -1) + + self.assertTrue((rs[:5] == -1).all()) + self.assertTrue((rs[6:10] == -1).all()) + self.assertTrue((rs[20:30] == -1).all()) + self.assertTrue((pd.isnull(ser[:5])).all()) + + # replace with different values + rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) + + self.assertTrue((rs[:5] == -1).all()) + self.assertTrue((rs[6:10] == -2).all()) + self.assertTrue((rs[20:30] == -3).all()) + self.assertTrue((pd.isnull(ser[:5])).all()) + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) + + self.assertTrue((ser[:5] == -1).all()) + self.assertTrue((ser[6:10] == -1).all()) + self.assertTrue((ser[20:30] == -1).all()) + + ser = pd.Series([np.nan, 0, np.inf]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + filled = ser.copy() + filled[4] = 0 + tm.assert_series_equal(ser.replace(np.inf, 0), filled) + + ser = pd.Series(self.ts.index) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + # malformed + self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) + + # make sure that we aren't just masking a TypeError because bools don't + # implement indexing + with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + ser.replace([1, 2], [np.nan, 0]) + + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) + tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0])) + + def test_replace_gh5319(self): + # API change from 0.12? + # GH 5319 + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace([np.nan]) + tm.assert_series_equal(result, expected) + + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace(np.nan) + tm.assert_series_equal(result, expected) + # GH 5797 + ser = pd.Series(pd.date_range('20130101', periods=5)) + expected = ser.copy() + expected.loc[2] = pd.Timestamp('20120101') + result = ser.replace({pd.Timestamp('20130103'): + pd.Timestamp('20120101')}) + tm.assert_series_equal(result, expected) + result = ser.replace(pd.Timestamp('20130103'), + pd.Timestamp('20120101')) + tm.assert_series_equal(result, expected) + + def test_replace_with_single_list(self): + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([1, 2, 3]) + tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) + + s = ser.copy() + s.replace([1, 2, 3], inplace=True) + tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) + + # make sure things don't get corrupted when fillna call fails + s = ser.copy() + with tm.assertRaises(ValueError): + s.replace([1, 2, 3], inplace=True, method='crash_cymbal') + tm.assert_series_equal(s, ser) + + def test_replace_mixed_types(self): + s = pd.Series(np.arange(5), dtype='int64') + + def check_replace(to_rep, val, expected): + sc = s.copy() + r = s.replace(to_rep, val) + sc.replace(to_rep, val, inplace=True) + tm.assert_series_equal(expected, r) + tm.assert_series_equal(expected, sc) + + # should NOT upcast to float + e = pd.Series([0, 1, 2, 3, 4]) + tr, v = [3], [3.0] + check_replace(tr, v, e) + + # MUST upcast to float + e = pd.Series([0, 1, 2, 3.5, 4]) + tr, v = [3], [3.5] + check_replace(tr, v, e) + + # casts to object + e = pd.Series([0, 1, 2, 3.5, 'a']) + tr, v = [3, 4], [3.5, 'a'] + check_replace(tr, v, e) + + # again casts to object + e = pd.Series([0, 1, 2, 3.5, pd.Timestamp('20130101')]) + tr, v = [3, 4], [3.5, pd.Timestamp('20130101')] + check_replace(tr, v, e) + + # casts to float + e = pd.Series([0, 1, 2, 3.5, 1]) + tr, v = [3, 4], [3.5, True] + check_replace(tr, v, e) + + # test an object with dates + floats + integers + strings + dr = pd.date_range('1/1/2001', '1/10/2001', + freq='D').to_series().reset_index(drop=True) + result = dr.astype(object).replace( + [dr[0], dr[1], dr[2]], [1.0, 2, 'a']) + expected = pd.Series([1.0, 2, 'a'] + dr[3:].tolist(), dtype=object) + tm.assert_series_equal(result, expected) + + def test_replace_bool_with_string_no_op(self): + s = pd.Series([True, False, True]) + result = s.replace('fun', 'in-the-sun') + tm.assert_series_equal(s, result) + + def test_replace_bool_with_string(self): + # nonexistent elements + s = pd.Series([True, False, True]) + result = s.replace(True, '2u') + expected = pd.Series(['2u', False, '2u']) + tm.assert_series_equal(expected, result) + + def test_replace_bool_with_bool(self): + s = pd.Series([True, False, True]) + result = s.replace(True, False) + expected = pd.Series([False] * len(s)) + tm.assert_series_equal(expected, result) + + def test_replace_with_dict_with_bool_keys(self): + s = pd.Series([True, False, True]) + with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + s.replace({'asdf': 'asdb', True: 'yes'}) + + def test_replace2(self): + N = 100 + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), + dtype=object) + ser[:5] = np.nan + ser[6:10] = 'foo' + ser[20:30] = 'bar' + + # replace list with a single value + rs = ser.replace([np.nan, 'foo', 'bar'], -1) + + self.assertTrue((rs[:5] == -1).all()) + self.assertTrue((rs[6:10] == -1).all()) + self.assertTrue((rs[20:30] == -1).all()) + self.assertTrue((pd.isnull(ser[:5])).all()) + + # replace with different values + rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) + + self.assertTrue((rs[:5] == -1).all()) + self.assertTrue((rs[6:10] == -2).all()) + self.assertTrue((rs[20:30] == -3).all()) + self.assertTrue((pd.isnull(ser[:5])).all()) + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) + self.assertTrue((ser[:5] == -1).all()) + self.assertTrue((ser[6:10] == -1).all()) + self.assertTrue((ser[20:30] == -1).all()) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 880145715ce62..c77d71be7c9c9 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- import collections -from datetime import datetime +from datetime import datetime, timedelta import re import nose @@ -251,6 +251,106 @@ def test_0d_array(self): self.assertFalse(isnull(np.array(0, dtype=object))) +class TestNumberScalar(tm.TestCase): + + def test_is_number(self): + + self.assertTrue(com.is_number(True)) + self.assertTrue(com.is_number(1)) + self.assertTrue(com.is_number(1.1)) + self.assertTrue(com.is_number(1 + 3j)) + self.assertTrue(com.is_number(np.bool(False))) + self.assertTrue(com.is_number(np.int64(1))) + self.assertTrue(com.is_number(np.float64(1.1))) + self.assertTrue(com.is_number(np.complex128(1 + 3j))) + self.assertTrue(com.is_number(np.nan)) + + self.assertFalse(com.is_number(None)) + self.assertFalse(com.is_number('x')) + self.assertFalse(com.is_number(datetime(2011, 1, 1))) + self.assertFalse(com.is_number(np.datetime64('2011-01-01'))) + self.assertFalse(com.is_number(pd.Timestamp('2011-01-01'))) + self.assertFalse(com.is_number(pd.Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(com.is_number(timedelta(1000))) + self.assertFalse(com.is_number(pd.Timedelta('1 days'))) + + # questionable + self.assertFalse(com.is_number(np.bool_(False))) + self.assertTrue(com.is_number(np.timedelta64(1, 'D'))) + + def test_is_bool(self): + self.assertTrue(com.is_bool(True)) + self.assertTrue(com.is_bool(np.bool(False))) + self.assertTrue(com.is_bool(np.bool_(False))) + + self.assertFalse(com.is_bool(1)) + self.assertFalse(com.is_bool(1.1)) + self.assertFalse(com.is_bool(1 + 3j)) + self.assertFalse(com.is_bool(np.int64(1))) + self.assertFalse(com.is_bool(np.float64(1.1))) + self.assertFalse(com.is_bool(np.complex128(1 + 3j))) + self.assertFalse(com.is_bool(np.nan)) + self.assertFalse(com.is_bool(None)) + self.assertFalse(com.is_bool('x')) + self.assertFalse(com.is_bool(datetime(2011, 1, 1))) + self.assertFalse(com.is_bool(np.datetime64('2011-01-01'))) + self.assertFalse(com.is_bool(pd.Timestamp('2011-01-01'))) + self.assertFalse(com.is_bool(pd.Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(com.is_bool(timedelta(1000))) + self.assertFalse(com.is_bool(np.timedelta64(1, 'D'))) + self.assertFalse(com.is_bool(pd.Timedelta('1 days'))) + + def test_is_integer(self): + self.assertTrue(com.is_integer(1)) + self.assertTrue(com.is_integer(np.int64(1))) + + self.assertFalse(com.is_integer(True)) + self.assertFalse(com.is_integer(1.1)) + self.assertFalse(com.is_integer(1 + 3j)) + self.assertFalse(com.is_integer(np.bool(False))) + self.assertFalse(com.is_integer(np.bool_(False))) + self.assertFalse(com.is_integer(np.float64(1.1))) + self.assertFalse(com.is_integer(np.complex128(1 + 3j))) + self.assertFalse(com.is_integer(np.nan)) + self.assertFalse(com.is_integer(None)) + self.assertFalse(com.is_integer('x')) + self.assertFalse(com.is_integer(datetime(2011, 1, 1))) + self.assertFalse(com.is_integer(np.datetime64('2011-01-01'))) + self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01'))) + self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(com.is_integer(timedelta(1000))) + self.assertFalse(com.is_integer(pd.Timedelta('1 days'))) + + # questionable + self.assertTrue(com.is_integer(np.timedelta64(1, 'D'))) + + def test_is_float(self): + self.assertTrue(com.is_float(1.1)) + self.assertTrue(com.is_float(np.float64(1.1))) + self.assertTrue(com.is_float(np.nan)) + + self.assertFalse(com.is_float(True)) + self.assertFalse(com.is_float(1)) + self.assertFalse(com.is_float(1 + 3j)) + self.assertFalse(com.is_float(np.bool(False))) + self.assertFalse(com.is_float(np.bool_(False))) + self.assertFalse(com.is_float(np.int64(1))) + self.assertFalse(com.is_float(np.complex128(1 + 3j))) + self.assertFalse(com.is_float(None)) + self.assertFalse(com.is_float('x')) + self.assertFalse(com.is_float(datetime(2011, 1, 1))) + self.assertFalse(com.is_float(np.datetime64('2011-01-01'))) + self.assertFalse(com.is_float(pd.Timestamp('2011-01-01'))) + self.assertFalse(com.is_float(pd.Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(com.is_float(timedelta(1000))) + self.assertFalse(com.is_float(np.timedelta64(1, 'D'))) + self.assertFalse(com.is_float(pd.Timedelta('1 days'))) + + def test_downcast_conv(): # test downcasting