Skip to content

Commit

Permalink
Bug in loc raised Error when non-integer slice was given for MultiInd…
Browse files Browse the repository at this point in the history
…ex (#37707)
  • Loading branch information
phofl authored Nov 24, 2020
1 parent b19e47f commit 1cc030e
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 9 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,7 @@ Indexing
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)
- Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`)
- Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`)

Missing
^^^^^^^
Expand Down
29 changes: 22 additions & 7 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2764,9 +2764,17 @@ def _partial_tup_index(self, tup, side="left"):
return start + section.searchsorted(loc, side=side)

idx = self._get_loc_single_level_index(lev, lab)
if k < n - 1:
if isinstance(idx, slice) and k < n - 1:
# Get start and end value from slice, necessary when a non-integer
# interval is given as input GH#37707
start = idx.start
end = idx.stop
elif k < n - 1:
end = start + section.searchsorted(idx, side="right")
start = start + section.searchsorted(idx, side="left")
elif isinstance(idx, slice):
idx = idx.start
return start + section.searchsorted(idx, side=side)
else:
return start + section.searchsorted(idx, side=side)

Expand Down Expand Up @@ -3102,6 +3110,8 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
start = 0
if key.stop is not None:
stop = level_index.get_loc(key.stop)
elif isinstance(start, slice):
stop = len(level_index)
else:
stop = len(level_index) - 1
step = key.step
Expand Down Expand Up @@ -3136,22 +3146,27 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):

else:

code = self._get_loc_single_level_index(level_index, key)
idx = self._get_loc_single_level_index(level_index, key)

if level > 0 or self.lexsort_depth == 0:
# Desired level is not sorted
locs = np.array(level_codes == code, dtype=bool, copy=False)
locs = np.array(level_codes == idx, dtype=bool, copy=False)
if not locs.any():
# The label is present in self.levels[level] but unused:
raise KeyError(key)
return locs

i = level_codes.searchsorted(code, side="left")
j = level_codes.searchsorted(code, side="right")
if i == j:
if isinstance(idx, slice):
start = idx.start
end = idx.stop
else:
start = level_codes.searchsorted(idx, side="left")
end = level_codes.searchsorted(idx, side="right")

if start == end:
# The label is present in self.levels[level] but unused:
raise KeyError(key)
return slice(i, j)
return slice(start, end)

def get_locs(self, seq):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/multi/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ def test_timestamp_multiindex_indexer():
[
pd.date_range(
start="2019-01-02T00:15:33",
end="2019-01-05T02:15:33",
end="2019-01-05T03:15:33",
freq="H",
name="date",
),
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,3 +659,12 @@ def test_getitem_non_found_tuple():
)
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
df.loc[(2.0, 2.0, 3.0)]


def test_get_loc_datetime_index():
# GH#24263
index = pd.date_range("2001-01-01", periods=100)
mi = MultiIndex.from_arrays([index])
# Check if get_loc matches for Index and MultiIndex
assert mi.get_loc("2001-01") == slice(0, 31, None)
assert index.get_loc("2001-01") == slice(0, 31, None)
48 changes: 47 additions & 1 deletion pandas/tests/indexing/multiindex/test_partial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import numpy as np
import pytest

from pandas import DataFrame, Float64Index, Int64Index, MultiIndex
from pandas import (
DataFrame,
Float64Index,
Int64Index,
MultiIndex,
date_range,
to_datetime,
)
import pandas._testing as tm


Expand Down Expand Up @@ -208,6 +215,45 @@ def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
expected.loc["bar"] = 0
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"indexer, exp_idx, exp_values",
[
(slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]),
(
slice(None, "2019-2"),
date_range("2019", periods=2, freq="MS"),
[0, 1, 2, 3],
),
],
)
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
# GH: 25165
date_idx = date_range("2019", periods=2, freq="MS")
df = DataFrame(
list(range(4)),
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
)
expected = DataFrame(
exp_values,
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
)
result = df[indexer]
tm.assert_frame_equal(result, expected)
result = df.loc[indexer]
tm.assert_frame_equal(result, expected)

result = df.loc(axis=0)[indexer]
tm.assert_frame_equal(result, expected)

result = df.loc[indexer, :]
tm.assert_frame_equal(result, expected)

df2 = df.swaplevel(0, 1).sort_index()
expected = expected.swaplevel(0, 1).sort_index()

result = df2.loc[:, indexer, :]
tm.assert_frame_equal(result, expected)


def test_loc_getitem_partial_both_axis():
# gh-12660
Expand Down

0 comments on commit 1cc030e

Please sign in to comment.