DEPR: loc with listlikes with missing elements (pandas-dev#29802)

proost · Dec 19, 2019 · 988165b · 988165b
1 parent 187d978
commit 988165b
Show file tree

Hide file tree

Showing 11 changed files with 116 additions and 241 deletions.
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1176,18 +1176,12 @@ def _validate_read_indexer(
             # non-missing values), but a bit later in the
             # code, so we want to avoid warning & then
             # just raising
-
-            _missing_key_warning = textwrap.dedent(
-                """
-            Passing list-likes to .loc or [] with any missing label will raise
-            KeyError in the future, you can use .reindex() as an alternative.
-
-            See the documentation here:
-            https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"""  # noqa: E501
-            )
-
             if not (ax.is_categorical() or ax.is_interval()):
-                warnings.warn(_missing_key_warning, FutureWarning, stacklevel=6)
+                raise KeyError(
+                    "Passing list-likes to .loc or [] with any missing labels "
+                    "is no longer supported, see "
+                    "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"  # noqa:E501
+                )
 
     def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
         """

diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -393,16 +393,12 @@ def __init__(
             if not len(Index(cols) & df.columns):
                 raise KeyError("passes columns are not ALL present dataframe")
 
-            # deprecatedin gh-17295
-            # 1 missing is ok (for now)
             if len(Index(cols) & df.columns) != len(cols):
-                warnings.warn(
-                    "Not all names specified in 'columns' are found; "
-                    "this will raise a KeyError in the future",
-                    FutureWarning,
-                )
+                # Deprecated in GH#17295, enforced in 1.0.0
+                raise KeyError("Not all names specified in 'columns' are found")
+
+            self.df = df
 
-            self.df = df.reindex(columns=cols)
         self.columns = self.df.columns
         self.float_format = float_format
         self.index = index

diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py
@@ -2,6 +2,7 @@
 
 from dateutil import tz
 import numpy as np
+import pytest
 
 import pandas as pd
 from pandas import DataFrame, Index, Series, Timestamp, date_range
@@ -242,11 +243,8 @@ def test_series_partial_set_datetime(self):
             Timestamp("2011-01-02"),
             Timestamp("2011-01-03"),
         ]
-        exp = Series(
-            [np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s"
-        )
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            ser.loc[keys]
 
     def test_series_partial_set_period(self):
         # GH 11497
@@ -273,12 +271,8 @@ def test_series_partial_set_period(self):
             pd.Period("2011-01-02", freq="D"),
             pd.Period("2011-01-03", freq="D"),
         ]
-        exp = Series(
-            [np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s"
-        )
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = ser.loc[keys]
-        tm.assert_series_equal(result, exp)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            ser.loc[keys]
 
     def test_nanosecond_getitem_setitem_with_tz(self):
         # GH 11679

diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
@@ -726,25 +726,15 @@ def test_floating_misc(self):
         tm.assert_series_equal(result1, result3)
         tm.assert_series_equal(result1, result4)
 
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result1 = s[[1.6, 5, 10]]
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result2 = s.loc[[1.6, 5, 10]]
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result3 = s.loc[[1.6, 5, 10]]
-        tm.assert_series_equal(result1, result2)
-        tm.assert_series_equal(result1, result3)
-        tm.assert_series_equal(result1, Series([np.nan, 2, 4], index=[1.6, 5, 10]))
-
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result1 = s[[0, 1, 2]]
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result2 = s.loc[[0, 1, 2]]
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result3 = s.loc[[0, 1, 2]]
-        tm.assert_series_equal(result1, result2)
-        tm.assert_series_equal(result1, result3)
-        tm.assert_series_equal(result1, Series([0.0, np.nan, np.nan], index=[0, 1, 2]))
+        with pytest.raises(KeyError, match="with any missing labels"):
+            s[[1.6, 5, 10]]
+        with pytest.raises(KeyError, match="with any missing labels"):
+            s.loc[[1.6, 5, 10]]
+
+        with pytest.raises(KeyError, match="with any missing labels"):
+            s[[0, 1, 2]]
+        with pytest.raises(KeyError, match="with any missing labels"):
+            s.loc[[0, 1, 2]]
 
         result1 = s.loc[[2.5, 5]]
         result2 = s.loc[[2.5, 5]]

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -728,20 +728,8 @@ def test_iloc_non_unique_indexing(self):
         df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
         df2 = concat([df2, 2 * df2, 3 * df2])
 
-        sidx = df2.index.to_series()
-        expected = df2.iloc[idx[idx <= sidx.max()]]
-
-        new_list = []
-        for r, s in expected.iterrows():
-            new_list.append(s)
-            new_list.append(s * 2)
-            new_list.append(s * 3)
-
-        expected = DataFrame(new_list)
-        expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True)
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = df2.loc[idx]
-        tm.assert_frame_equal(result, expected, check_index_type=False)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            df2.loc[idx]
 
     def test_iloc_empty_list_indexer_is_ok(self):
 

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -299,32 +299,13 @@ def test_dups_fancy_indexing(self):
         tm.assert_frame_equal(result, expected)
 
         rows = ["C", "B", "E"]
-        expected = DataFrame(
-            {
-                "test": [11, 9, np.nan],
-                "test1": [7.0, 6, np.nan],
-                "other": ["d", "c", np.nan],
-            },
-            index=rows,
-        )
-
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = df.loc[rows]
-        tm.assert_frame_equal(result, expected)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            df.loc[rows]
 
         # see GH5553, make sure we use the right indexer
         rows = ["F", "G", "H", "C", "B", "E"]
-        expected = DataFrame(
-            {
-                "test": [np.nan, np.nan, np.nan, 11, 9, np.nan],
-                "test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan],
-                "other": [np.nan, np.nan, np.nan, "d", "c", np.nan],
-            },
-            index=rows,
-        )
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = df.loc[rows]
-        tm.assert_frame_equal(result, expected)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            df.loc[rows]
 
         # List containing only missing label
         dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
@@ -340,38 +321,25 @@ def test_dups_fancy_indexing(self):
 
         # GH 4619; duplicate indexer with missing label
         df = DataFrame({"A": [0, 1, 2]})
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = df.loc[[0, 8, 0]]
-        expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
-        tm.assert_frame_equal(result, expected, check_index_type=False)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            df.loc[[0, 8, 0]]
 
         df = DataFrame({"A": list("abc")})
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = df.loc[[0, 8, 0]]
-        expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0])
-        tm.assert_frame_equal(result, expected, check_index_type=False)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            df.loc[[0, 8, 0]]
 
         # non unique with non unique selector
         df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
-        expected = DataFrame(
-            {"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"]
-        )
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = df.loc[["A", "A", "E"]]
-        tm.assert_frame_equal(result, expected)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            df.loc[["A", "A", "E"]]
 
     def test_dups_fancy_indexing2(self):
         # GH 5835
         # dups on index and missing values
         df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])
 
-        expected = pd.concat(
-            [df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)],
-            axis=1,
-        )
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = df.loc[:, ["A", "B", "C"]]
-        tm.assert_frame_equal(result, expected)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            df.loc[:, ["A", "B", "C"]]
 
         # GH 6504, multi-axis indexing
         df = DataFrame(

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -159,48 +159,46 @@ def test_loc_getitem_label_list_with_missing(self):
         self.check_result(
             "loc", [0, 1, 2], "indexer", [0, 1, 2], typs=["empty"], fails=KeyError,
         )
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            self.check_result(
-                "loc",
-                [0, 2, 10],
-                "ix",
-                [0, 2, 10],
-                typs=["ints", "uints", "floats"],
-                axes=0,
-                fails=KeyError,
-            )
+        self.check_result(
+            "loc",
+            [0, 2, 10],
+            "ix",
+            [0, 2, 10],
+            typs=["ints", "uints", "floats"],
+            axes=0,
+            fails=KeyError,
+        )
 
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            self.check_result(
-                "loc",
-                [3, 6, 7],
-                "ix",
-                [3, 6, 7],
-                typs=["ints", "uints", "floats"],
-                axes=1,
-                fails=KeyError,
-            )
+        self.check_result(
+            "loc",
+            [3, 6, 7],
+            "ix",
+            [3, 6, 7],
+            typs=["ints", "uints", "floats"],
+            axes=1,
+            fails=KeyError,
+        )
 
         # GH 17758 - MultiIndex and missing keys
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            self.check_result(
-                "loc",
-                [(1, 3), (1, 4), (2, 5)],
-                "ix",
-                [(1, 3), (1, 4), (2, 5)],
-                typs=["multi"],
-                axes=0,
-            )
+        self.check_result(
+            "loc",
+            [(1, 3), (1, 4), (2, 5)],
+            "ix",
+            [(1, 3), (1, 4), (2, 5)],
+            typs=["multi"],
+            axes=0,
+            fails=KeyError,
+        )
 
     def test_getitem_label_list_with_missing(self):
         s = Series(range(3), index=["a", "b", "c"])
 
         # consistency
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        with pytest.raises(KeyError, match="with any missing labels"):
             s[["a", "d"]]
 
         s = Series(range(3))
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        with pytest.raises(KeyError, match="with any missing labels"):
             s[[0, 3]]
 
     def test_loc_getitem_label_list_fails(self):
@@ -305,10 +303,8 @@ def test_loc_to_fail(self):
             s.loc[["4"]]
 
         s.loc[-1] = 3
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            result = s.loc[[-1, -2]]
-        expected = Series([3, np.nan], index=[-1, -2])
-        tm.assert_series_equal(result, expected)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            s.loc[[-1, -2]]
 
         s["a"] = 2
         msg = (
@@ -354,10 +350,8 @@ def test_loc_getitem_list_with_fail(self):
             s.loc[[3]]
 
         # a non-match and a match
-        with tm.assert_produces_warning(FutureWarning):
-            expected = s.loc[[2, 3]]
-        result = s.reindex([2, 3])
-        tm.assert_series_equal(result, expected)
+        with pytest.raises(KeyError, match="with any missing labels"):
+            s.loc[[2, 3]]
 
     def test_loc_getitem_label_slice(self):
 
@@ -1034,10 +1028,8 @@ def test_series_loc_getitem_label_list_missing_values():
         ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
     )
     s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
-    expected = Series([11.0, 5.0, 11.0, np.nan], index=key)
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = s.loc[key]
-    tm.assert_series_equal(result, expected)
+    with pytest.raises(KeyError, match="with any missing labels"):
+        s.loc[key]
 
 
 @pytest.mark.parametrize(