From 925237a87a25b5d4af94b4429608d37465d6d51f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 9 Nov 2018 08:40:15 +0100 Subject: [PATCH 01/14] TST: add tests for keeping dtype in Series.update --- pandas/tests/series/test_combine_concat.py | 33 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index a685eb7e9fbd3..f0bbe1ec4f509 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -10,7 +10,7 @@ import pandas as pd from pandas import DataFrame, DatetimeIndex, Series, compat, date_range import pandas.util.testing as tm -from pandas.util.testing import assert_series_equal +from pandas.util.testing import assert_series_equal, assert_frame_equal class TestSeriesCombine(): @@ -116,8 +116,35 @@ def test_update(self): df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df['c'] = np.nan - # this will fail as long as series is a sub-class of ndarray - # df['c'].update(Series(['foo'],index=[0])) ##### + df['c'].update(Series(['foo'], index=[0])) + expected = DataFrame([[1, np.nan, 'foo'], [3, 2., np.nan]], + columns=['a', 'b', 'c']) + assert_frame_equal(df, expected) + + def test_update_dtypes(self): + s = Series([1., 2., False, True]) + + other = Series([45], index=[0]) + s.update(other) + + expected = Series([45., 2., False, True]) + assert_series_equal(s, expected) + + s = Series([10, 11, 12]) + other = Series([61, 63], index=[1, 3]) + s.update(other) + + expected = Series([10, 61, 12]) + assert_series_equal(s, expected) + + # we always try to keep original dtype, even if other has different one + s.update(other.astype(float)) + assert_series_equal(s, expected) + + # if keeping the dtype is not possible, we allow upcasting + s.update(other + 0.1) + expected = Series([10., 61.1, 12.]) + assert_series_equal(s, expected) def test_concat_empty_series_dtypes_roundtrips(self): From 9a74273ff79bc720d1dbc2197da623b4bd6208ec Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 9 Nov 2018 20:54:53 +0100 Subject: [PATCH 02/14] Retrigger CI From 3718979b1aa9e31f16159485f425ca41b01ee1e7 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 11 Nov 2018 10:35:57 +0100 Subject: [PATCH 03/14] Review (gfyoung) --- pandas/tests/series/test_combine_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index be40894039299..4535ca2209182 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -13,7 +13,7 @@ from pandas.util.testing import assert_series_equal, assert_frame_equal -class TestSeriesCombine(): +class TestSeriesCombine(object): def test_append(self, datetime_series, string_series, object_series): appendedSeries = string_series.append(object_series) From 31317eace21f142b840ea04e2876b31e64ad2261 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 11 Nov 2018 11:31:05 +0100 Subject: [PATCH 04/14] Fix isort --- pandas/tests/series/test_combine_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 4535ca2209182..34c2246954d68 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -10,7 +10,7 @@ import pandas as pd from pandas import DataFrame, DatetimeIndex, Series, compat, date_range import pandas.util.testing as tm -from pandas.util.testing import assert_series_equal, assert_frame_equal +from pandas.util.testing import assert_frame_equal, assert_series_equal class TestSeriesCombine(object): From 241de9032dee8a5c956f38721b565bd98e54f9a9 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 11 Nov 2018 19:23:58 +0100 Subject: [PATCH 05/14] Review (jreback) --- pandas/tests/series/test_combine_concat.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 34c2246954d68..ffeb22d5b3fc1 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -123,28 +123,30 @@ def test_update(self): def test_update_dtypes(self): s = Series([1., 2., False, True]) - other = Series([45], index=[0]) s.update(other) expected = Series([45., 2., False, True]) assert_series_equal(s, expected) - s = Series([10, 11, 12]) + t_src = Series([10, 11, 12]) + t = t_src.copy() other = Series([61, 63], index=[1, 3]) - s.update(other) + t.update(other) expected = Series([10, 61, 12]) - assert_series_equal(s, expected) + assert_series_equal(t, expected) # we always try to keep original dtype, even if other has different one - s.update(other.astype(float)) - assert_series_equal(s, expected) + t = t_src.copy() + t.update(other.astype(float)) + assert_series_equal(t, expected) # if keeping the dtype is not possible, we allow upcasting - s.update(other + 0.1) + t = t_src.copy() + t.update(other + 0.1) expected = Series([10., 61.1, 12.]) - assert_series_equal(s, expected) + assert_series_equal(t, expected) def test_concat_empty_series_dtypes_roundtrips(self): From 528f36f31496638a48b6b9f5661240f958eb4693 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 11 Nov 2018 22:30:02 +0100 Subject: [PATCH 06/14] Retrigger CI after flaky hypothesis failure From c180b48d1bd0db58f128be102bcdfc656e8f4d71 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 12 Nov 2018 19:19:47 +0100 Subject: [PATCH 07/14] Review (jreback) --- pandas/tests/series/test_combine_concat.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index ffeb22d5b3fc1..ce2a1211dcbd9 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -129,24 +129,24 @@ def test_update_dtypes(self): expected = Series([45., 2., False, True]) assert_series_equal(s, expected) - t_src = Series([10, 11, 12]) - t = t_src.copy() + s = Series([10, 11, 12]) + s_copy = s.copy() other = Series([61, 63], index=[1, 3]) - t.update(other) + s_copy.update(other) expected = Series([10, 61, 12]) - assert_series_equal(t, expected) + assert_series_equal(s_copy, expected) # we always try to keep original dtype, even if other has different one - t = t_src.copy() - t.update(other.astype(float)) - assert_series_equal(t, expected) + s_copy = s.copy() + s_copy.update(other.astype(float)) + assert_series_equal(s_copy, expected) # if keeping the dtype is not possible, we allow upcasting - t = t_src.copy() - t.update(other + 0.1) + s_copy = s.copy() + s_copy.update(other + 0.1) expected = Series([10., 61.1, 12.]) - assert_series_equal(t, expected) + assert_series_equal(s_copy, expected) def test_concat_empty_series_dtypes_roundtrips(self): From 437a0250af7d3516d40bfe55aee9002ea83c6884 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 15 Nov 2018 00:21:24 +0100 Subject: [PATCH 08/14] Review (gfyoung) --- pandas/tests/series/test_combine_concat.py | 41 ++++++++++------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index ce2a1211dcbd9..307e87f077837 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -121,33 +121,28 @@ def test_update(self): columns=['a', 'b', 'c']) assert_frame_equal(df, expected) - def test_update_dtypes(self): - s = Series([1., 2., False, True]) - other = Series([45], index=[0]) + @pytest.mark.parametrize('other_values', [ + [61, 63], # int + [61., 63.], # float, but can be cast to int + [61.1, 63.1], # float, cannot be cast to int + [(61,), (63,)] # object + ], ids=['int', 'float_castable', 'float', 'object']) + @pytest.mark.parametrize('caller_dtype', ['int', 'float', object]) + def test_update_dtypes(self, caller_dtype, other_values): + caller_values = [10, 11, 12] + s = Series(caller_values, dtype=caller_dtype) + other = Series(other_values, index=[1, 3]) s.update(other) - expected = Series([45., 2., False, True]) + expected_values = [caller_values[0], other_values[0], caller_values[2]] + try: + # we keep original dtype whenever possible + expected = Series(expected_values, dtype=caller_dtype) + except ValueError: + expected = Series(expected_values) + print(s, expected) assert_series_equal(s, expected) - s = Series([10, 11, 12]) - s_copy = s.copy() - other = Series([61, 63], index=[1, 3]) - s_copy.update(other) - - expected = Series([10, 61, 12]) - assert_series_equal(s_copy, expected) - - # we always try to keep original dtype, even if other has different one - s_copy = s.copy() - s_copy.update(other.astype(float)) - assert_series_equal(s_copy, expected) - - # if keeping the dtype is not possible, we allow upcasting - s_copy = s.copy() - s_copy.update(other + 0.1) - expected = Series([10., 61.1, 12.]) - assert_series_equal(s_copy, expected) - def test_concat_empty_series_dtypes_roundtrips(self): # round-tripping with self & like self From be15e28c77d57c0f7d0bcdecae39ea89846159a1 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 15 Nov 2018 01:20:22 +0100 Subject: [PATCH 09/14] Remove stray print statement --- pandas/tests/series/test_combine_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 307e87f077837..e7320a50bd211 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -140,7 +140,7 @@ def test_update_dtypes(self, caller_dtype, other_values): expected = Series(expected_values, dtype=caller_dtype) except ValueError: expected = Series(expected_values) - print(s, expected) + assert_series_equal(s, expected) def test_concat_empty_series_dtypes_roundtrips(self): From b736c4bc1a0f44180c957c3884a18f5438447b56 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 18 Nov 2018 14:23:04 +0100 Subject: [PATCH 10/14] Add test without try-catch for demo purposes --- pandas/tests/series/test_combine_concat.py | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index e7320a50bd211..fb09044bef19a 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -143,6 +143,32 @@ def test_update_dtypes(self, caller_dtype, other_values): assert_series_equal(s, expected) + @pytest.mark.parametrize('other_values, caller_dtype, expected', [ + # other_values is int + ([61, 63], 'int64', pd.Series([10, 61, 12])), + ([61, 63], float, pd.Series([10., 61., 12.])), + ([61, 63], object, pd.Series([10, 61, 12], dtype=object)), + # other_values is float, but can be cast to int + ([61., 63.], 'int64', pd.Series([10, 61, 12], dtype='int64')), + ([61., 63.], float, pd.Series([10., 61., 12.])), + ([61., 63.], object, pd.Series([10, 61., 12], dtype=object)), + # other_values is float, cannot be cast to int + ([61.1, 63.1], 'int64', pd.Series([10., 61.1, 12.])), + ([61.1, 63.1], float, pd.Series([10., 61.1, 12.])), + ([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)), + # other_values is object, cannot be cast + ([(61,), (63,)], 'int64', pd.Series([10, (61,), 12])), + ([(61,), (63,)], float, pd.Series([10., (61,), 12.])), + ([(61,), (63,)], object, pd.Series([10, (61,), 12])) + ]) + def test_update_dtypes_no_try_catch(self, other_values, caller_dtype, expected): + caller_values = [10, 11, 12] + s = Series(caller_values, dtype=caller_dtype) + other = Series(other_values, index=[1, 3]) + s.update(other) + + assert_series_equal(s, expected) + def test_concat_empty_series_dtypes_roundtrips(self): # round-tripping with self & like self From 8f78023913ca465754f76b5aa3fc765bbaf394d1 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 18 Nov 2018 15:12:25 +0100 Subject: [PATCH 11/14] Lint --- pandas/tests/series/test_combine_concat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index fb09044bef19a..54cabf2ce32e8 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -159,9 +159,10 @@ def test_update_dtypes(self, caller_dtype, other_values): # other_values is object, cannot be cast ([(61,), (63,)], 'int64', pd.Series([10, (61,), 12])), ([(61,), (63,)], float, pd.Series([10., (61,), 12.])), - ([(61,), (63,)], object, pd.Series([10, (61,), 12])) + ([(61,), (63,)], object, pd.Series([10, (61,), 12])) ]) - def test_update_dtypes_no_try_catch(self, other_values, caller_dtype, expected): + def test_update_dtypes_no_try_catch(self, other_values, + caller_dtype, expected): caller_values = [10, 11, 12] s = Series(caller_values, dtype=caller_dtype) other = Series(other_values, index=[1, 3]) From a5610a2a61fd1a2a07c221dff6ee521326b3dd88 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 18 Nov 2018 15:16:38 +0100 Subject: [PATCH 12/14] Easily extend dtype coverage of test with try-catch --- pandas/tests/series/test_combine_concat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 54cabf2ce32e8..51023b8140c45 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -127,7 +127,8 @@ def test_update(self): [61.1, 63.1], # float, cannot be cast to int [(61,), (63,)] # object ], ids=['int', 'float_castable', 'float', 'object']) - @pytest.mark.parametrize('caller_dtype', ['int', 'float', object]) + @pytest.mark.parametrize('caller_dtype', ['int32', 'int64', + 'float32', 'float64', object]) def test_update_dtypes(self, caller_dtype, other_values): caller_values = [10, 11, 12] s = Series(caller_values, dtype=caller_dtype) From 9168f4e811550e515226160a281b24c9eb7bb540 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 18 Nov 2018 17:16:33 +0100 Subject: [PATCH 13/14] Lint --- pandas/tests/series/test_combine_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 51023b8140c45..77ae665d9cc99 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -160,7 +160,7 @@ def test_update_dtypes(self, caller_dtype, other_values): # other_values is object, cannot be cast ([(61,), (63,)], 'int64', pd.Series([10, (61,), 12])), ([(61,), (63,)], float, pd.Series([10., (61,), 12.])), - ([(61,), (63,)], object, pd.Series([10, (61,), 12])) + ([(61,), (63,)], object, pd.Series([10, (61,), 12])) ]) def test_update_dtypes_no_try_catch(self, other_values, caller_dtype, expected): From 0f50a25215a1a22c6f93ea063580f3f21f723aae Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 19 Nov 2018 18:35:33 +0100 Subject: [PATCH 14/14] Review (jreback) --- pandas/tests/series/test_combine_concat.py | 48 +++++++--------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 77ae665d9cc99..e13cb9edffe2b 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -121,52 +121,32 @@ def test_update(self): columns=['a', 'b', 'c']) assert_frame_equal(df, expected) - @pytest.mark.parametrize('other_values', [ - [61, 63], # int - [61., 63.], # float, but can be cast to int - [61.1, 63.1], # float, cannot be cast to int - [(61,), (63,)] # object - ], ids=['int', 'float_castable', 'float', 'object']) - @pytest.mark.parametrize('caller_dtype', ['int32', 'int64', - 'float32', 'float64', object]) - def test_update_dtypes(self, caller_dtype, other_values): - caller_values = [10, 11, 12] - s = Series(caller_values, dtype=caller_dtype) - other = Series(other_values, index=[1, 3]) - s.update(other) - - expected_values = [caller_values[0], other_values[0], caller_values[2]] - try: - # we keep original dtype whenever possible - expected = Series(expected_values, dtype=caller_dtype) - except ValueError: - expected = Series(expected_values) - - assert_series_equal(s, expected) - - @pytest.mark.parametrize('other_values, caller_dtype, expected', [ - # other_values is int + @pytest.mark.parametrize('other, dtype, expected', [ + # other is int + ([61, 63], 'int32', pd.Series([10, 61, 12], dtype='int32')), ([61, 63], 'int64', pd.Series([10, 61, 12])), ([61, 63], float, pd.Series([10., 61., 12.])), ([61, 63], object, pd.Series([10, 61, 12], dtype=object)), - # other_values is float, but can be cast to int - ([61., 63.], 'int64', pd.Series([10, 61, 12], dtype='int64')), + # other is float, but can be cast to int + ([61., 63.], 'int32', pd.Series([10, 61, 12], dtype='int32')), + ([61., 63.], 'int64', pd.Series([10, 61, 12])), ([61., 63.], float, pd.Series([10., 61., 12.])), ([61., 63.], object, pd.Series([10, 61., 12], dtype=object)), - # other_values is float, cannot be cast to int + # others is float, cannot be cast to int + ([61.1, 63.1], 'int32', pd.Series([10., 61.1, 12.])), ([61.1, 63.1], 'int64', pd.Series([10., 61.1, 12.])), ([61.1, 63.1], float, pd.Series([10., 61.1, 12.])), ([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)), - # other_values is object, cannot be cast + # other is object, cannot be cast + ([(61,), (63,)], 'int32', pd.Series([10, (61,), 12])), ([(61,), (63,)], 'int64', pd.Series([10, (61,), 12])), ([(61,), (63,)], float, pd.Series([10., (61,), 12.])), ([(61,), (63,)], object, pd.Series([10, (61,), 12])) ]) - def test_update_dtypes_no_try_catch(self, other_values, - caller_dtype, expected): - caller_values = [10, 11, 12] - s = Series(caller_values, dtype=caller_dtype) - other = Series(other_values, index=[1, 3]) + def test_update_dtypes(self, other, dtype, expected): + + s = Series([10, 11, 12], dtype=dtype) + other = Series(other, index=[1, 3]) s.update(other) assert_series_equal(s, expected)