Skip to content

Commit

Permalink
API: CategoricalIndex.append fallback to concat_compat (#38098)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Nov 29, 2020
1 parent ce0efe8 commit 22007d3
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 53 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ Other API changes
- Passing an invalid ``fill_value`` to :meth:`Series.shift` with a ``CategoricalDtype`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
- Passing an invalid value to :meth:`IntervalIndex.insert` or :meth:`CategoricalIndex.insert` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
- Attempting to reindex a Series with a :class:`CategoricalIndex` with an invalid ``fill_value`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
- :meth:`CategoricalIndex.append` with an index that contains non-category values will now cast instead of raising ``TypeError`` (:issue:`38098`)

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -635,6 +636,7 @@ Indexing
- Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`)
- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` raising ``KeyError`` when columns were :class:`MultiIndex` with only one level (:issue:`29749`)
- Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`)
- Bug in setting a new label on a :class:`DataFrame` or :class:`Series` with a :class:`CategoricalIndex` incorrectly raising ``TypeError`` when the new label is not among the index's categories (:issue:`38098`)

Missing
^^^^^^^
Expand Down
6 changes: 0 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4180,12 +4180,6 @@ def _coerce_scalar_to_index(self, item):

return Index([item], dtype=dtype, **self._get_attributes_dict())

def _to_safe_for_reshape(self):
"""
Convert to object if we are a categorical.
"""
return self

def _validate_fill_value(self, value):
"""
Check if the value can be inserted into our array, and convert
Expand Down
20 changes: 12 additions & 8 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,10 +399,6 @@ def unique(self, level=None):
# of result, not self.
return type(self)._simple_new(result, name=self.name)

def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
return self.astype("object")

def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
"""
Create index with target's values (move/add/delete values as necessary)
Expand Down Expand Up @@ -637,11 +633,19 @@ def map(self, mapper):
mapped = self._values.map(mapper)
return Index(mapped, name=self.name)

def _concat(self, to_concat: List["Index"], name: Label) -> "CategoricalIndex":
def _concat(self, to_concat: List["Index"], name: Label) -> Index:
# if calling index is category, don't check dtype of others
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=name)
try:
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
except TypeError:
# not all to_concat elements are among our categories (or NA)
from pandas.core.dtypes.concat import concat_compat

res = concat_compat(to_concat)
return Index(res, name=name)
else:
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=name)

def _delegate_method(self, name: str, *args, **kwargs):
""" method delegation to the ._values """
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1684,10 +1684,6 @@ def unique(self, level=None):
level = self._get_level_number(level)
return self._get_level_values(level=level, unique=True)

def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
return self.set_levels([i._to_safe_for_reshape() for i in self.levels])

def to_frame(self, index=True, name=None):
"""
Create a DataFrame with the levels of the MultiIndex as columns.
Expand Down
31 changes: 8 additions & 23 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,19 +268,13 @@ def _add_margins(
margin_dummy = DataFrame(row_margin, columns=[key]).T

row_names = result.index.names
try:
# check the result column and leave floats
for dtype in set(result.dtypes):
cols = result.select_dtypes([dtype]).columns
margin_dummy[cols] = margin_dummy[cols].apply(
maybe_downcast_to_dtype, args=(dtype,)
)
result = result.append(margin_dummy)
except TypeError:

# we cannot reshape, so coerce the axis
result.index = result.index._to_safe_for_reshape()
result = result.append(margin_dummy)
# check the result column and leave floats
for dtype in set(result.dtypes):
cols = result.select_dtypes([dtype]).columns
margin_dummy[cols] = margin_dummy[cols].apply(
maybe_downcast_to_dtype, args=(dtype,)
)
result = result.append(margin_dummy)
result.index.names = row_names

return result
Expand Down Expand Up @@ -328,16 +322,7 @@ def _all_key(key):

# we are going to mutate this, so need to copy!
piece = piece.copy()
try:
piece[all_key] = margin[key]
except ValueError:
# we cannot reshape, so coerce the axis
piece.set_axis(
piece._get_axis(cat_axis)._to_safe_for_reshape(),
axis=cat_axis,
inplace=True,
)
piece[all_key] = margin[key]
piece[all_key] = margin[key]

table_pieces.append(piece)
margin_keys.append(all_key)
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ def test_append(self):
expected = CategoricalIndex(list("aabbcaca"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)

# invalid objects
msg = "cannot append a non-category item to a CategoricalIndex"
with pytest.raises(TypeError, match=msg):
ci.append(Index(["a", "d"]))
# invalid objects -> cast to object via concat_compat
result = ci.append(Index(["a", "d"]))
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
tm.assert_index_equal(result, expected, exact=True)

# GH14298 - if base object is not categorical -> coerce to object
result = Index(["c", "a"]).append(ci)
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@ def test_loc_scalar(self):
with pytest.raises(KeyError, match=r"^'d'$"):
df.loc["d"]

msg = "cannot append a non-category item to a CategoricalIndex"
with pytest.raises(TypeError, match=msg):
df.loc["d"] = 10
df2 = df.copy()
expected = df2.copy()
expected.index = expected.index.astype(object)
expected.loc["d"] = 10
df2.loc["d"] = 10
tm.assert_frame_equal(df2, expected)

msg = "'fill_value=d' is not present in this Categorical's categories"
with pytest.raises(TypeError, match=msg):
Expand Down
14 changes: 9 additions & 5 deletions pandas/tests/reshape/concat/test_categorical.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype

Expand Down Expand Up @@ -137,13 +136,18 @@ def test_categorical_index_preserver(self):
).set_index("B")
tm.assert_frame_equal(result, expected)

# wrong categories
# wrong categories -> uses concat_compat, which casts to object
df3 = DataFrame(
{"A": a, "B": Categorical(b, categories=list("abe"))}
).set_index("B")
msg = "categories must match existing categories when appending"
with pytest.raises(TypeError, match=msg):
pd.concat([df2, df3])
result = pd.concat([df2, df3])
expected = pd.concat(
[
df2.set_axis(df2.index.astype(object), 0),
df3.set_axis(df3.index.astype(object), 0),
]
)
tm.assert_frame_equal(result, expected)

def test_concat_categorical_tz(self):
# GH-23816
Expand Down

0 comments on commit 22007d3

Please sign in to comment.