Skip to content

Commit

Permalink
BUG: membership checks on ExtensionArray containing NA values
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Nov 15, 2020
1 parent 8d1b8ab commit 589d0d3
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,7 @@ ExtensionArray
- Fixed bug when applying a NumPy ufunc with multiple outputs to a :class:`pandas.arrays.IntegerArray` returning None (:issue:`36913`)
- Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`)
- Reductions for :class:`BooleanArray`, :class:`Categorical`, :class:`DatetimeArray`, :class:`FloatingArray`, :class:`IntegerArray`, :class:`PeriodArray`, :class:`TimedeltaArray`, and :class:`PandasArray` are now keyword-only methods (:issue:`37541`)
- Bug, where a `ValueError` was wrongly raised if a membership check was made on an `ExtensionArray` with :class:`NA` values, but without a custom ``__contains__`` method (:issue:`xxxxx`)

Other
^^^^^
Expand Down
14 changes: 13 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.missing import isna, notna

from pandas.core import ops
from pandas.core.algorithms import factorize_array, unique
Expand Down Expand Up @@ -351,6 +351,18 @@ def __iter__(self):
for i in range(len(self)):
yield self[i]

def __contains__(self, item) -> bool:
"""
Return for `item in self`.
"""
# comparisons of ant item to pd.NA always return pd.NA, so e.g.
# "a" in [pd.NA] raises a TypeError. The implementation below works around that.
if isna(item):
return isna(self).any() if self._can_hold_na else False

arr = self[notna(self)] if self._can_hold_na else self
return item in iter(arr)

def __eq__(self, other: Any) -> ArrayLike:
"""
Return for `self == other` (element-wise equality).
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/arrays/categorical/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,3 +395,15 @@ def test_numeric_like_ops(self):
msg = "Object with dtype category cannot perform the numpy op log"
with pytest.raises(TypeError, match=msg):
np.log(s)

def test_contains(self, ordered):
# GH-xxxxx
cat = Categorical(["a", "b"], ordered=ordered)
assert "a" in cat
assert "x" not in cat
assert pd.NA not in cat

cat = Categorical([np.nan, "a"], ordered=ordered)
assert "a" in cat
assert "x" not in cat
assert pd.NA in cat
14 changes: 14 additions & 0 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,3 +345,17 @@ def test_astype_from_float_dtype(dtype):
result = s.astype("string")
expected = pd.Series(["0.1"], dtype="string")
tm.assert_series_equal(result, expected)


def test_contains():
# GH-xxxxx
arr = pd.arrays.StringArray(np.array(["a", "b"], dtype=object))

assert "a" in arr
assert "x" not in arr
assert pd.NA not in arr

arr = pd.arrays.StringArray(np.array(["a", pd.NA]))
assert "a" in arr
assert "x" not in arr
assert pd.NA in arr

0 comments on commit 589d0d3

Please sign in to comment.