From 712e871c4b5a6d12ed1c1ac47f6792b3ef33b113 Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Mon, 4 Nov 2019 09:20:49 +0000 Subject: [PATCH 01/12] Fix indexing.setitem when type is pd.Categorical --- pandas/core/internals/blocks.py | 8 ++++++++ pandas/tests/frame/test_indexing.py | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 51108d9a5a573..5baaec73f094f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -883,6 +883,14 @@ def setitem(self, indexer, value): ): values[indexer] = value try: + # GH25495 may need to convert to categorical block + if self.is_categorical_astype( + arr_value.dtype + ) and not is_categorical_dtype(values): + return self.make_block( + Categorical(self.values, dtype=arr_value.dtype) + ) + values = values.astype(arr_value.dtype) except ValueError: pass diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index e215c90d2eb04..4c6c93c9ca86a 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -204,6 +204,15 @@ def test_setitem_list_of_tuples(self, float_frame): expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) + def test_setitem_single_row_categorical(self): + df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) + categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"]) + df.loc[:, "Alpha"] = categories + + result = df["Alpha"] + expected = Series(categories, index=df.index, name="Alpha") + tm.assert_series_equal(result, expected) + def test_setitem_mulit_index(self): # GH7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns From a23b3c9782a910009c2e37c8a6c7d5ddf544ebd5 Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Wed, 6 Nov 2019 01:16:31 +0000 Subject: [PATCH 02/12] Move categorical casting check further up --- pandas/core/internals/blocks.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5baaec73f094f..59eddffac9ed2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -828,6 +828,14 @@ def setitem(self, indexer, value): if lib.is_scalar(value): value = convert_scalar(values, value) + if ( + hasattr(value, "dtype") + and self.is_categorical_astype(value.dtype) + and not is_categorical_dtype(values) + ): + values[indexer] = value + return self.make_block(Categorical(self.values, dtype=value.dtype)) + else: # current dtype cannot store value, coerce to common dtype find_dtype = False @@ -883,13 +891,6 @@ def setitem(self, indexer, value): ): values[indexer] = value try: - # GH25495 may need to convert to categorical block - if self.is_categorical_astype( - arr_value.dtype - ) and not is_categorical_dtype(values): - return self.make_block( - Categorical(self.values, dtype=arr_value.dtype) - ) values = values.astype(arr_value.dtype) except ValueError: From 550d68d04b666d89b4a84b767f56a70e063a9685 Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Wed, 6 Nov 2019 15:16:46 +0000 Subject: [PATCH 03/12] Move block creation to after we've check for exact match in shape --- pandas/core/internals/blocks.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 59eddffac9ed2..52731a303eca7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -828,14 +828,6 @@ def setitem(self, indexer, value): if lib.is_scalar(value): value = convert_scalar(values, value) - if ( - hasattr(value, "dtype") - and self.is_categorical_astype(value.dtype) - and not is_categorical_dtype(values) - ): - values[indexer] = value - return self.make_block(Categorical(self.values, dtype=value.dtype)) - else: # current dtype cannot store value, coerce to common dtype find_dtype = False @@ -890,6 +882,14 @@ def setitem(self, indexer, value): and arr_value.size == values.size ): values[indexer] = value + + if self.is_categorical_astype(arr_value.dtype) and not is_categorical_dtype( + values + ): + # GH25495 - If the current dtype is not categorical, + # we need to create a new categorical block + return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) + try: values = values.astype(arr_value.dtype) From c945ab118224599ad9d75e9756d3cd6eec41643b Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Wed, 6 Nov 2019 15:18:09 +0000 Subject: [PATCH 04/12] Remove unused whitespace --- pandas/core/internals/blocks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 52731a303eca7..5d8ae761c752e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -891,7 +891,6 @@ def setitem(self, indexer, value): return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) try: - values = values.astype(arr_value.dtype) except ValueError: pass From 5d477541e362252528a953b9790eb57de862afad Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Thu, 7 Nov 2019 14:42:51 +0000 Subject: [PATCH 05/12] Move test to TestDataFrameIndexingCategorical, add issue number --- pandas/tests/frame/test_indexing.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 4c6c93c9ca86a..ba7a4e2607a04 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -204,15 +204,6 @@ def test_setitem_list_of_tuples(self, float_frame): expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) - def test_setitem_single_row_categorical(self): - df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) - categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"]) - df.loc[:, "Alpha"] = categories - - result = df["Alpha"] - expected = Series(categories, index=df.index, name="Alpha") - tm.assert_series_equal(result, expected) - def test_setitem_mulit_index(self): # GH7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns @@ -3858,6 +3849,16 @@ def test_assigning_ops(self): df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp) + def test_setitem_single_row_categorical(self): + # GH 25495 + df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) + categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"]) + df.loc[:, "Alpha"] = categories + + result = df["Alpha"] + expected = Series(categories, index=df.index, name="Alpha") + tm.assert_series_equal(result, expected) + def test_functions_no_warnings(self): df = DataFrame({"value": np.random.randint(0, 100, 20)}) labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] From 2dfa594ebbe92def595c0b3c8acf2ee5ddde5442 Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Fri, 8 Nov 2019 16:56:19 +0000 Subject: [PATCH 06/12] Move checks to seperate elif --- pandas/core/internals/blocks.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5d8ae761c752e..737dc4f637514 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -864,7 +864,11 @@ def setitem(self, indexer, value): # length checking check_setitem_lengths(indexer, value, values) - + exact_match = ( + len(arr_value.shape) + and arr_value.shape[0] == values.shape[0] + and arr_value.size == values.size + ) if is_empty_indexer(indexer, arr_value): # GH#8669 empty indexers pass @@ -874,21 +878,20 @@ def setitem(self, indexer, value): # be e.g. a list; see GH#6043 values[indexer] = value - # if we are an exact match (ex-broadcasting), - # then use the resultant dtype elif ( - len(arr_value.shape) - and arr_value.shape[0] == values.shape[0] - and arr_value.size == values.size + self.is_categorical_astype(arr_value.dtype) + and not is_categorical_dtype(values) + and exact_match ): + # GH25495 - If the current dtype is not categorical, + # we need to create a new categorical block values[indexer] = value + return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) - if self.is_categorical_astype(arr_value.dtype) and not is_categorical_dtype( - values - ): - # GH25495 - If the current dtype is not categorical, - # we need to create a new categorical block - return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) + # if we are an exact match (ex-broadcasting), + # then use the resultant dtype + elif exact_match: + values[indexer] = value try: values = values.astype(arr_value.dtype) From 8c5a88ef742c6d88670f36edc40ab8a75be11ef7 Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Sat, 16 Nov 2019 23:56:32 +0000 Subject: [PATCH 07/12] Update whatsnew entry --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cd012fe755337..28544555bfd19 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -367,7 +367,7 @@ Indexing - Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) - :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`) - :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`) -- +- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`) Missing ^^^^^^^ From 2e958531c088868c0b94a2030045a39808b9592c Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Mon, 25 Nov 2019 16:43:03 +0000 Subject: [PATCH 08/12] Move tests to tests/frame/indexing/test_categorical.py --- pandas/tests/frame/indexing/test_categorical.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index b595e48797d41..314359fc22cc4 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -354,6 +354,16 @@ def test_functions_no_warnings(self): df.value, range(0, 105, 10), right=False, labels=labels ) + def test_setitem_single_row_categorical(self): + # GH 25495 + df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) + categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"]) + df.loc[:, "Alpha"] = categories + + result = df["Alpha"] + expected = Series(categories, index=df.index, name="Alpha") + tm.assert_series_equal(result, expected) + def test_loc_indexing_preserves_index_category_dtype(self): # GH 15166 df = DataFrame( From 39c95f41515c5bec6b4b2a1527f9c599e446b388 Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Mon, 25 Nov 2019 16:44:51 +0000 Subject: [PATCH 09/12] Fix Pep8 Warning --- pandas/tests/frame/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 29a698d29bfa3..9a7cd4ace686f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -2888,4 +2888,4 @@ def test_transpose(self, uint64_frame): result = uint64_frame.T expected = DataFrame(uint64_frame.values.T) expected.index = ["A", "B"] - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) From 4257fe8a5fe688e1959674a7bdc99814e2cc9d0e Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Mon, 2 Dec 2019 14:57:32 +0000 Subject: [PATCH 10/12] use is_categorical_dtype(arr_value.dtype), as is_categorical_astype is not necessary --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 966258d965681..aa49791c92cc0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -867,7 +867,7 @@ def setitem(self, indexer, value): values[indexer] = value elif ( - self.is_categorical_astype(arr_value.dtype) + is_categorical_dtype(arr_value.dtype) and not is_categorical_dtype(values) and exact_match ): From 551211918629787a6c03077260051b65ecbf8bca Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Sun, 29 Dec 2019 19:07:06 +0000 Subject: [PATCH 11/12] put exact_match as the first condition --- pandas/core/internals/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b18a129fbbfef..27d533e9602d1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -865,9 +865,9 @@ def setitem(self, indexer, value): values[indexer] = value elif ( - is_categorical_dtype(arr_value.dtype) + exact_match + and is_categorical_dtype(arr_value.dtype) and not is_categorical_dtype(values) - and exact_match ): # GH25495 - If the current dtype is not categorical, # we need to create a new categorical block From 241bd7c9af552b566896f5b93c4bb67912b4129e Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Mon, 13 Jan 2020 13:38:19 +0000 Subject: [PATCH 12/12] Remove merge conflict comment --- doc/source/whatsnew/v1.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 655b1196fc669..fb7fe9712326f 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1037,7 +1037,6 @@ Indexing - :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`) - Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) - Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`) ->>>>>>> upstream/master Missing ^^^^^^^