Skip to content

Commit

Permalink
BUG: Index.get_indexer_non_unique misbehaves when index contains mult…
Browse files Browse the repository at this point in the history
…iple nan (pandas-dev#35392)
  • Loading branch information
alexhlim committed Jul 31, 2020
1 parent 04e9e0a commit d041117
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
11 changes: 10 additions & 1 deletion pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,15 @@ cdef class IndexEngine:
Py_ssize_t i, j, n, n_t, n_alloc

self._ensure_mapping_populated()
values = np.array(self._get_index_values(), copy=False)
# GH #35392 Index.get_indexer_non_unique misbehaves when index contains multiple nan
if any([checknull(t) for t in targets]):
new_targets = [0 if checknull(t) else t for t in targets]
new_values = [0 if checknull(v) else v for v in self._get_index_values()]
targets = np.array(new_targets, dtype=object)
values = np.array(new_values, dtype=object)
else:
values = np.array(self._get_index_values(), copy=False)

stargets = set(targets)
n = len(values)
n_t = len(targets)
Expand Down Expand Up @@ -304,6 +312,7 @@ cdef class IndexEngine:
for i in range(n):
val = values[i]
if val in stargets:
print(val, "in stargets")
if val not in d:
d[val] = []
d[val].append(i)
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,22 @@ def test_get_indexer_non_unique_dtype_mismatch():
indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)


@pytest.mark.parametrize(
"idx, target, expected",
[
([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.int64)),
(
[np.nan, "var1", np.nan],
[np.nan, "var1"],
np.array([0, 2, 1], dtype=np.int64),
),
],
)
def test_get_indexer_non_unique_multiple_nans(idx, target, expected):
# GH 35392
axis = pd.Index(idx)
actual = axis.get_indexer_for(target)

tm.assert_numpy_array_equal(actual, expected)

0 comments on commit d041117

Please sign in to comment.