Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: loc with listlikes with missing elements #29802

Merged
merged 7 commits into from
Nov 29, 2019
16 changes: 5 additions & 11 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,18 +1174,12 @@ def _validate_read_indexer(
# non-missing values), but a bit later in the
# code, so we want to avoid warning & then
# just raising

_missing_key_warning = textwrap.dedent(
"""
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike""" # noqa: E501
)

if not (ax.is_categorical() or ax.is_interval()):
warnings.warn(_missing_key_warning, FutureWarning, stacklevel=6)
raise KeyError(
"Passing list-likes to .loc or [] with any missing labels "
"is no longer supported, see "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501
)

def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
"""
Expand Down
11 changes: 3 additions & 8 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,16 +393,11 @@ def __init__(
if not len(Index(cols) & df.columns):
raise KeyError("passes columns are not ALL present dataframe")

# deprecatedin gh-17295
# 1 missing is ok (for now)
if len(Index(cols) & df.columns) != len(cols):
warnings.warn(
"Not all names specified in 'columns' are found; "
"this will raise a KeyError in the future",
FutureWarning,
)
# Deprecated in GH#17295, enforced in 1.0.0
raise KeyError("Not all names specified in 'columns' are found")

self.df = df.reindex(columns=cols)
self.df = df.reindex(columns=cols) # TODO: Should this change?
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
self.columns = self.df.columns
self.float_format = float_format
self.index = index
Expand Down
16 changes: 5 additions & 11 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dateutil import tz
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, date_range
Expand Down Expand Up @@ -242,11 +243,8 @@ def test_series_partial_set_datetime(self):
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
with pytest.raises(KeyError, match="with any missing labels"):
ser.loc[keys]

def test_series_partial_set_period(self):
# GH 11497
Expand All @@ -273,12 +271,8 @@ def test_series_partial_set_period(self):
pd.Period("2011-01-02", freq="D"),
pd.Period("2011-01-03", freq="D"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[keys]
tm.assert_series_equal(result, exp)
with pytest.raises(KeyError, match="with any missing labels"):
ser.loc[keys]

def test_nanosecond_getitem_setitem_with_tz(self):
# GH 11679
Expand Down
28 changes: 9 additions & 19 deletions pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,25 +726,15 @@ def test_floating_misc(self):
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[1.6, 5, 10]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, Series([np.nan, 2, 4], index=[1.6, 5, 10]))

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[0, 1, 2]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, Series([0.0, np.nan, np.nan], index=[0, 1, 2]))
with pytest.raises(KeyError, match="with any missing labels"):
s[[1.6, 5, 10]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[1.6, 5, 10]]

with pytest.raises(KeyError, match="with any missing labels"):
s[[0, 1, 2]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[0, 1, 2]]

result1 = s.loc[[2.5, 5]]
result2 = s.loc[[2.5, 5]]
Expand Down
17 changes: 3 additions & 14 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,20 +750,9 @@ def test_iloc_non_unique_indexing(self):
df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
df2 = concat([df2, 2 * df2, 3 * df2])

sidx = df2.index.to_series()
expected = df2.iloc[idx[idx <= sidx.max()]]

new_list = []
for r, s in expected.iterrows():
new_list.append(s)
new_list.append(s * 2)
new_list.append(s * 3)

expected = DataFrame(new_list)
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df2.loc[idx]
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
# TODO: should non-uniquness play a part in the error message?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, yes I would agree here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good, ill see what i can do here.

BTW in #6581 are we removing things in the 0.25.0 section or only the earlier ones?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm the non-uniqueness doesn't seem to play any part in us getting here, so im now leaning towards not having a special message

df2.loc[idx]

def test_iloc_empty_list_indexer_is_ok(self):

Expand Down
56 changes: 12 additions & 44 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,32 +299,13 @@ def test_dups_fancy_indexing(self):
tm.assert_frame_equal(result, expected)

rows = ["C", "B", "E"]
expected = DataFrame(
{
"test": [11, 9, np.nan],
"test1": [7.0, 6, np.nan],
"other": ["d", "c", np.nan],
},
index=rows,
)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[rows]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[rows]

# see GH5553, make sure we use the right indexer
rows = ["F", "G", "H", "C", "B", "E"]
expected = DataFrame(
{
"test": [np.nan, np.nan, np.nan, 11, 9, np.nan],
"test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan],
"other": [np.nan, np.nan, np.nan, "d", "c", np.nan],
},
index=rows,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[rows]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[rows]

# List containing only missing label
dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
Expand All @@ -340,38 +321,25 @@ def test_dups_fancy_indexing(self):

# GH 4619; duplicate indexer with missing label
df = DataFrame({"A": [0, 1, 2]})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[[0, 8, 0]]
expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[[0, 8, 0]]

df = DataFrame({"A": list("abc")})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[[0, 8, 0]]
expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0])
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[[0, 8, 0]]

# non unique with non unique selector
df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
expected = DataFrame(
{"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"]
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[["A", "A", "E"]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[["A", "A", "E"]]

def test_dups_fancy_indexing2(self):
# GH 5835
# dups on index and missing values
df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])

expected = pd.concat(
[df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)],
axis=1,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[:, ["A", "B", "C"]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[:, ["A", "B", "C"]]

# GH 6504, multi-axis indexing
df = DataFrame(
Expand Down
84 changes: 38 additions & 46 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,51 +221,49 @@ def test_loc_getitem_label_list_with_missing(self):
typs=["empty"],
fails=KeyError,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"list lbl",
"loc",
[0, 2, 10],
"ix",
[0, 2, 10],
typs=["ints", "uints", "floats"],
axes=0,
fails=KeyError,
)
self.check_result(
"list lbl",
"loc",
[0, 2, 10],
"ix",
[0, 2, 10],
typs=["ints", "uints", "floats"],
axes=0,
fails=KeyError,
)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"list lbl",
"loc",
[3, 6, 7],
"ix",
[3, 6, 7],
typs=["ints", "uints", "floats"],
axes=1,
fails=KeyError,
)
self.check_result(
"list lbl",
"loc",
[3, 6, 7],
"ix",
[3, 6, 7],
typs=["ints", "uints", "floats"],
axes=1,
fails=KeyError,
)

# GH 17758 - MultiIndex and missing keys
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"list lbl",
"loc",
[(1, 3), (1, 4), (2, 5)],
"ix",
[(1, 3), (1, 4), (2, 5)],
typs=["multi"],
axes=0,
)
self.check_result(
"list lbl",
"loc",
[(1, 3), (1, 4), (2, 5)],
"ix",
[(1, 3), (1, 4), (2, 5)],
typs=["multi"],
axes=0,
fails=KeyError,
)

def test_getitem_label_list_with_missing(self):
s = Series(range(3), index=["a", "b", "c"])

# consistency
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(KeyError, match="with any missing labels"):
s[["a", "d"]]

s = Series(range(3))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(KeyError, match="with any missing labels"):
s[[0, 3]]

def test_loc_getitem_label_list_fails(self):
Expand Down Expand Up @@ -386,10 +384,8 @@ def test_loc_to_fail(self):
s.loc[["4"]]

s.loc[-1] = 3
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[-1, -2]]
expected = Series([3, np.nan], index=[-1, -2])
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[-1, -2]]

s["a"] = 2
msg = (
Expand Down Expand Up @@ -435,10 +431,8 @@ def test_loc_getitem_list_with_fail(self):
s.loc[[3]]

# a non-match and a match
with tm.assert_produces_warning(FutureWarning):
expected = s.loc[[2, 3]]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[2, 3]]

def test_loc_getitem_label_slice(self):

Expand Down Expand Up @@ -1134,10 +1128,8 @@ def test_series_loc_getitem_label_list_missing_values():
["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
)
s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
expected = Series([11.0, 5.0, 11.0, np.nan], index=key)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[key]
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[key]


@pytest.mark.parametrize(
Expand Down
Loading