From 22007d3fd294033132be4fffb0fbdd10c8dd46de Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 29 Nov 2020 07:59:56 -0800 Subject: [PATCH] API: CategoricalIndex.append fallback to concat_compat (#38098) --- doc/source/whatsnew/v1.2.0.rst | 2 ++ pandas/core/indexes/base.py | 6 ---- pandas/core/indexes/category.py | 20 +++++++----- pandas/core/indexes/multi.py | 4 --- pandas/core/reshape/pivot.py | 31 +++++-------------- .../indexes/categorical/test_category.py | 8 ++--- pandas/tests/indexing/test_categorical.py | 9 ++++-- .../tests/reshape/concat/test_categorical.py | 14 ++++++--- 8 files changed, 41 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f53cde7fac068..501e2878ab135 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -454,6 +454,7 @@ Other API changes - Passing an invalid ``fill_value`` to :meth:`Series.shift` with a ``CategoricalDtype`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`) - Passing an invalid value to :meth:`IntervalIndex.insert` or :meth:`CategoricalIndex.insert` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`) - Attempting to reindex a Series with a :class:`CategoricalIndex` with an invalid ``fill_value`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`) +- :meth:`CategoricalIndex.append` with an index that contains non-category values will now cast instead of raising ``TypeError`` (:issue:`38098`) .. --------------------------------------------------------------------------- @@ -635,6 +636,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`) - Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` raising ``KeyError`` when columns were :class:`MultiIndex` with only one level (:issue:`29749`) - Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`) +- Bug in setting a new label on a :class:`DataFrame` or :class:`Series` with a :class:`CategoricalIndex` incorrectly raising ``TypeError`` when the new label is not among the index's categories (:issue:`38098`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c49f3f9457161..c86652acbcd0f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4180,12 +4180,6 @@ def _coerce_scalar_to_index(self, item): return Index([item], dtype=dtype, **self._get_attributes_dict()) - def _to_safe_for_reshape(self): - """ - Convert to object if we are a categorical. - """ - return self - def _validate_fill_value(self, value): """ Check if the value can be inserted into our array, and convert diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7956b3a623333..abf70fd150345 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -399,10 +399,6 @@ def unique(self, level=None): # of result, not self. return type(self)._simple_new(result, name=self.name) - def _to_safe_for_reshape(self): - """ convert to object if we are a categorical """ - return self.astype("object") - def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ Create index with target's values (move/add/delete values as necessary) @@ -637,11 +633,19 @@ def map(self, mapper): mapped = self._values.map(mapper) return Index(mapped, name=self.name) - def _concat(self, to_concat: List["Index"], name: Label) -> "CategoricalIndex": + def _concat(self, to_concat: List["Index"], name: Label) -> Index: # if calling index is category, don't check dtype of others - codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) - cat = self._data._from_backing_data(codes) - return type(self)._simple_new(cat, name=name) + try: + codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) + except TypeError: + # not all to_concat elements are among our categories (or NA) + from pandas.core.dtypes.concat import concat_compat + + res = concat_compat(to_concat) + return Index(res, name=name) + else: + cat = self._data._from_backing_data(codes) + return type(self)._simple_new(cat, name=name) def _delegate_method(self, name: str, *args, **kwargs): """ method delegation to the ._values """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index dacd802b21e63..46846209f315b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1684,10 +1684,6 @@ def unique(self, level=None): level = self._get_level_number(level) return self._get_level_values(level=level, unique=True) - def _to_safe_for_reshape(self): - """ convert to object if we are a categorical """ - return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) - def to_frame(self, index=True, name=None): """ Create a DataFrame with the levels of the MultiIndex as columns. diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 22887cede51ed..40496a5b8671b 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -268,19 +268,13 @@ def _add_margins( margin_dummy = DataFrame(row_margin, columns=[key]).T row_names = result.index.names - try: - # check the result column and leave floats - for dtype in set(result.dtypes): - cols = result.select_dtypes([dtype]).columns - margin_dummy[cols] = margin_dummy[cols].apply( - maybe_downcast_to_dtype, args=(dtype,) - ) - result = result.append(margin_dummy) - except TypeError: - - # we cannot reshape, so coerce the axis - result.index = result.index._to_safe_for_reshape() - result = result.append(margin_dummy) + # check the result column and leave floats + for dtype in set(result.dtypes): + cols = result.select_dtypes([dtype]).columns + margin_dummy[cols] = margin_dummy[cols].apply( + maybe_downcast_to_dtype, args=(dtype,) + ) + result = result.append(margin_dummy) result.index.names = row_names return result @@ -328,16 +322,7 @@ def _all_key(key): # we are going to mutate this, so need to copy! piece = piece.copy() - try: - piece[all_key] = margin[key] - except ValueError: - # we cannot reshape, so coerce the axis - piece.set_axis( - piece._get_axis(cat_axis)._to_safe_for_reshape(), - axis=cat_axis, - inplace=True, - ) - piece[all_key] = margin[key] + piece[all_key] = margin[key] table_pieces.append(piece) margin_keys.append(all_key) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 2e03c00638a5c..3bab57e1d265e 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -57,10 +57,10 @@ def test_append(self): expected = CategoricalIndex(list("aabbcaca"), categories=categories) tm.assert_index_equal(result, expected, exact=True) - # invalid objects - msg = "cannot append a non-category item to a CategoricalIndex" - with pytest.raises(TypeError, match=msg): - ci.append(Index(["a", "d"])) + # invalid objects -> cast to object via concat_compat + result = ci.append(Index(["a", "d"])) + expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"]) + tm.assert_index_equal(result, expected, exact=True) # GH14298 - if base object is not categorical -> coerce to object result = Index(["c", "a"]).append(ci) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 6fff706e27cd2..1b9b6452b2e33 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -57,9 +57,12 @@ def test_loc_scalar(self): with pytest.raises(KeyError, match=r"^'d'$"): df.loc["d"] - msg = "cannot append a non-category item to a CategoricalIndex" - with pytest.raises(TypeError, match=msg): - df.loc["d"] = 10 + df2 = df.copy() + expected = df2.copy() + expected.index = expected.index.astype(object) + expected.loc["d"] = 10 + df2.loc["d"] = 10 + tm.assert_frame_equal(df2, expected) msg = "'fill_value=d' is not present in this Categorical's categories" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py index 388575c5a3b86..6dae28003d3b6 100644 --- a/pandas/tests/reshape/concat/test_categorical.py +++ b/pandas/tests/reshape/concat/test_categorical.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from pandas.core.dtypes.dtypes import CategoricalDtype @@ -137,13 +136,18 @@ def test_categorical_index_preserver(self): ).set_index("B") tm.assert_frame_equal(result, expected) - # wrong categories + # wrong categories -> uses concat_compat, which casts to object df3 = DataFrame( {"A": a, "B": Categorical(b, categories=list("abe"))} ).set_index("B") - msg = "categories must match existing categories when appending" - with pytest.raises(TypeError, match=msg): - pd.concat([df2, df3]) + result = pd.concat([df2, df3]) + expected = pd.concat( + [ + df2.set_axis(df2.index.astype(object), 0), + df3.set_axis(df3.index.astype(object), 0), + ] + ) + tm.assert_frame_equal(result, expected) def test_concat_categorical_tz(self): # GH-23816