From 1cc030e984944bf4ca7ea5aec552dca55a117204 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Tue, 24 Nov 2020 22:59:56 +0100 Subject: [PATCH] Bug in loc raised Error when non-integer slice was given for MultiIndex (#37707) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/multi.py | 29 ++++++++--- pandas/tests/indexes/multi/test_indexing.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 9 ++++ .../tests/indexing/multiindex/test_partial.py | 48 ++++++++++++++++++- 5 files changed, 80 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 08c528fb484c8..48561b50f66ae 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -620,6 +620,7 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) +- Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 11dd3598b4864..be0b0c5208b1c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2764,9 +2764,17 @@ def _partial_tup_index(self, tup, side="left"): return start + section.searchsorted(loc, side=side) idx = self._get_loc_single_level_index(lev, lab) - if k < n - 1: + if isinstance(idx, slice) and k < n - 1: + # Get start and end value from slice, necessary when a non-integer + # interval is given as input GH#37707 + start = idx.start + end = idx.stop + elif k < n - 1: end = start + section.searchsorted(idx, side="right") start = start + section.searchsorted(idx, side="left") + elif isinstance(idx, slice): + idx = idx.start + return start + section.searchsorted(idx, side=side) else: return start + section.searchsorted(idx, side=side) @@ -3102,6 +3110,8 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): start = 0 if key.stop is not None: stop = level_index.get_loc(key.stop) + elif isinstance(start, slice): + stop = len(level_index) else: stop = len(level_index) - 1 step = key.step @@ -3136,22 +3146,27 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): else: - code = self._get_loc_single_level_index(level_index, key) + idx = self._get_loc_single_level_index(level_index, key) if level > 0 or self.lexsort_depth == 0: # Desired level is not sorted - locs = np.array(level_codes == code, dtype=bool, copy=False) + locs = np.array(level_codes == idx, dtype=bool, copy=False) if not locs.any(): # The label is present in self.levels[level] but unused: raise KeyError(key) return locs - i = level_codes.searchsorted(code, side="left") - j = level_codes.searchsorted(code, side="right") - if i == j: + if isinstance(idx, slice): + start = idx.start + end = idx.stop + else: + start = level_codes.searchsorted(idx, side="left") + end = level_codes.searchsorted(idx, side="right") + + if start == end: # The label is present in self.levels[level] but unused: raise KeyError(key) - return slice(i, j) + return slice(start, end) def get_locs(self, seq): """ diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 2b7a6ee304891..e0241c2c5eadd 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -768,7 +768,7 @@ def test_timestamp_multiindex_indexer(): [ pd.date_range( start="2019-01-02T00:15:33", - end="2019-01-05T02:15:33", + end="2019-01-05T03:15:33", freq="H", name="date", ), diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 22ffca46a4829..cd6176722245b 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -659,3 +659,12 @@ def test_getitem_non_found_tuple(): ) with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"): df.loc[(2.0, 2.0, 3.0)] + + +def test_get_loc_datetime_index(): + # GH#24263 + index = pd.date_range("2001-01-01", periods=100) + mi = MultiIndex.from_arrays([index]) + # Check if get_loc matches for Index and MultiIndex + assert mi.get_loc("2001-01") == slice(0, 31, None) + assert index.get_loc("2001-01") == slice(0, 31, None) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 538aa1d3a1164..9c356b81b85db 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,14 @@ import numpy as np import pytest -from pandas import DataFrame, Float64Index, Int64Index, MultiIndex +from pandas import ( + DataFrame, + Float64Index, + Int64Index, + MultiIndex, + date_range, + to_datetime, +) import pandas._testing as tm @@ -208,6 +215,45 @@ def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): expected.loc["bar"] = 0 tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "indexer, exp_idx, exp_values", + [ + (slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]), + ( + slice(None, "2019-2"), + date_range("2019", periods=2, freq="MS"), + [0, 1, 2, 3], + ), + ], + ) + def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): + # GH: 25165 + date_idx = date_range("2019", periods=2, freq="MS") + df = DataFrame( + list(range(4)), + index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), + ) + expected = DataFrame( + exp_values, + index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]), + ) + result = df[indexer] + tm.assert_frame_equal(result, expected) + result = df.loc[indexer] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis=0)[indexer] + tm.assert_frame_equal(result, expected) + + result = df.loc[indexer, :] + tm.assert_frame_equal(result, expected) + + df2 = df.swaplevel(0, 1).sort_index() + expected = expected.swaplevel(0, 1).sort_index() + + result = df2.loc[:, indexer, :] + tm.assert_frame_equal(result, expected) + def test_loc_getitem_partial_both_axis(): # gh-12660