From af81898b476b0ed317d232bdf9655bfd461bbc35 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 May 2022 16:51:16 -0700 Subject: [PATCH] DEPR: df.iloc[:, foo] = bar attempt to set inplace (#45333) --- doc/source/user_guide/10min.rst | 1 + doc/source/whatsnew/v1.5.0.rst | 67 ++++++++++++++++++ pandas/core/frame.py | 25 ++++++- pandas/core/groupby/groupby.py | 8 ++- pandas/core/indexing.py | 48 +++++++++++-- pandas/tests/extension/base/setitem.py | 27 +++++++- pandas/tests/frame/indexing/test_coercion.py | 10 ++- pandas/tests/frame/indexing/test_indexing.py | 7 +- pandas/tests/frame/indexing/test_setitem.py | 10 ++- pandas/tests/frame/indexing/test_where.py | 15 ++-- pandas/tests/frame/methods/test_diff.py | 5 +- pandas/tests/frame/methods/test_dropna.py | 4 +- pandas/tests/frame/methods/test_quantile.py | 2 +- pandas/tests/frame/methods/test_rename.py | 5 +- pandas/tests/frame/methods/test_shift.py | 10 ++- pandas/tests/frame/methods/test_sort_index.py | 3 + pandas/tests/frame/test_nonunique_indexes.py | 11 ++- pandas/tests/frame/test_reductions.py | 8 ++- pandas/tests/frame/test_stack_unstack.py | 8 ++- pandas/tests/indexing/multiindex/test_loc.py | 7 +- pandas/tests/indexing/test_iloc.py | 19 +++-- pandas/tests/indexing/test_indexing.py | 20 ++++-- pandas/tests/indexing/test_loc.py | 69 ++++++++++++++----- pandas/tests/indexing/test_partial.py | 4 +- pandas/tests/io/pytables/test_append.py | 8 +-- pandas/tests/io/sas/test_sas7bdat.py | 2 +- pandas/tests/io/test_stata.py | 4 +- 27 files changed, 334 insertions(+), 73 deletions(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 9916f13e015dd..0adb937de2b8b 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -328,6 +328,7 @@ Setting values by position: Setting by assigning with a NumPy array: .. ipython:: python + :okwarning: df.loc[:, "D"] = np.array([5] * len(df)) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b23dd5c2f05a6..e9e13c3ed5bbe 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -533,6 +533,73 @@ As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and raise a ``FutureWarning``. This can be silenced and the previous behavior retained by specifying ``group_keys=False``. +.. _whatsnew_150.notable_bug_fixes.setitem_column_try_inplace: + _ see also _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace + +Try operating inplace when setting values with ``loc`` and ``iloc`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Most of the time setting values with ``frame.iloc`` attempts to set values +in-place, only falling back to inserting a new array if necessary. In the past, +setting entire columns has been an exception to this rule: + +.. ipython:: python + + values = np.arange(4).reshape(2, 2) + df = pd.DataFrame(values) + ser = df[0] + +*Old behavior*: + +.. code-block:: ipython + + In [3]: df.iloc[:, 0] = np.array([10, 11]) + In [4]: ser + Out[4]: + 0 0 + 1 2 + Name: 0, dtype: int64 + +This behavior is deprecated. In a future version, setting an entire column with +iloc will attempt to operate inplace. + +*Future behavior*: + +.. code-block:: ipython + + In [3]: df.iloc[:, 0] = np.array([10, 11]) + In [4]: ser + Out[4]: + 0 10 + 1 11 + Name: 0, dtype: int64 + +To get the old behavior, use :meth:`DataFrame.__setitem__` directly: + +*Future behavior*: + +.. code-block:: ipython + + In [5]: df[0] = np.array([21, 31]) + In [4]: ser + Out[4]: + 0 10 + 1 11 + Name: 0, dtype: int64 + +In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`: + +*Future behavior*: + +.. code-block:: ipython + + In [5]: df.columns = ["A", "A"] + In [5]: df.isetitem(0, np.array([21, 31])) + In [4]: ser + Out[4]: + 0 10 + 1 11 + Name: 0, dtype: int64 + .. _whatsnew_150.deprecations.numeric_only_default: ``numeric_only`` default value diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7e123fc2a8bd2..d563aa8b06ca5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3687,6 +3687,29 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: loc = engine.get_loc(index) return series._values[loc] + def isetitem(self, loc, value) -> None: + """ + Set the given value in the column with position 'loc'. + + This is a positional analogue to __setitem__. + + Parameters + ---------- + loc : int or sequence of ints + value : scalar or arraylike + + Notes + ----- + Unlike `frame.iloc[:, i] = value`, `frame.isetitem(loc, value)` will + _never_ try to set the values in place, but will always insert a new + array. + + In cases where `frame.columns` is unique, this is equivalent to + `frame[frame.columns[i]] = value`. + """ + arraylike = self._sanitize_column(value) + self._iset_item_mgr(loc, arraylike, inplace=False) + def __setitem__(self, key, value): key = com.apply_if_callable(key, self) @@ -5467,7 +5490,7 @@ def _replace_columnwise( target, value = mapping[ax[i]] newobj = ser.replace(target, value, regex=regex) - res.iloc[:, i] = newobj + res._iset_item(i, newobj) if inplace: return diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 967dc3158d4ba..6cee6c1913f74 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2257,7 +2257,13 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default counts = self.count() result_ilocs = result.columns.get_indexer_for(cols) count_ilocs = counts.columns.get_indexer_for(cols) - result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) + with warnings.catch_warnings(): + # TODO(2.0): once iloc[:, foo] = bar depecation is enforced, + # this catching will be unnecessary + warnings.filterwarnings( + "ignore", ".*will attempt to set the values inplace.*" + ) + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result @final diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 02c095202d079..3418f1cab0e6f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1937,6 +1937,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): pi = plane_indexer ser = self.obj._ixs(loc, axis=1) + orig_values = ser._values # perform the equivalent of a setitem on the info axis # as we have a null slice or a slice with full bounds @@ -1944,17 +1945,17 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): # multi-dim object # GH#6149 (null slice), GH#10408 (full bounds) if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): - ser = value + pass elif ( is_array_like(value) and is_exact_shape_match(ser, value) and not is_empty_indexer(pi) ): if is_list_like(pi): - ser = value[np.argsort(pi)] + value = value[np.argsort(pi)] else: # in case of slice - ser = value[pi] + value = value[pi] else: # set the item, first attempting to operate inplace, then # falling back to casting if necessary; see @@ -1970,8 +1971,40 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): self.obj._iset_item(loc, ser) return - # reset the sliced object if unique - self.obj._iset_item(loc, ser) + # We will not operate in-place, but will attempt to in the future. + # To determine whether we need to issue a FutureWarning, see if the + # setting in-place would work, i.e. behavior will change. + warn = can_hold_element(ser._values, value) + # Don't issue the warning yet, as we can still trim a few cases where + # behavior will not change. + + self.obj._iset_item(loc, value) + + if warn: + new_values = self.obj._ixs(loc, axis=1)._values + + if ( + isinstance(new_values, np.ndarray) + and isinstance(orig_values, np.ndarray) + and np.shares_memory(new_values, orig_values) + ): + # TODO: get something like tm.shares_memory working? + # The values were set inplace after all, no need to warn, + # e.g. test_rename_nocopy + pass + else: + warnings.warn( + "In a future version, `df.iloc[:, i] = newvals` will attempt " + "to set the values inplace instead of always setting a new " + "array. To retain the old behavior, use either " + "`df[df.columns[i]] = newvals` or, if columns are non-unique, " + "`df.isetitem(i, newvals)`", + FutureWarning, + stacklevel=find_stack_level(), + ) + # TODO: how to get future behavior? + # TODO: what if we got here indirectly via loc? + return def _setitem_single_block(self, indexer, value, name: str): """ @@ -1981,7 +2014,6 @@ def _setitem_single_block(self, indexer, value, name: str): info_axis = self.obj._info_axis_number item_labels = self.obj._get_axis(info_axis) - if isinstance(indexer, tuple): # if we are setting on the info axis ONLY @@ -1996,7 +2028,9 @@ def _setitem_single_block(self, indexer, value, name: str): if len(item_labels.get_indexer_for([col])) == 1: # e.g. test_loc_setitem_empty_append_expands_rows loc = item_labels.get_loc(col) - self.obj._iset_item(loc, value) + # Go through _setitem_single_column to get + # FutureWarning if relevant. + self._setitem_single_column(loc, value, indexer[0]) return indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index c2db54d832195..283b67bb9171d 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -1,6 +1,13 @@ import numpy as np import pytest +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) + import pandas as pd import pandas._testing as tm from pandas.tests.extension.base.base import BaseExtensionTests @@ -361,6 +368,11 @@ def test_setitem_frame_2d_values(self, data): # GH#44514 df = pd.DataFrame({"A": data}) + # These dtypes have non-broken implementations of _can_hold_element + has_can_hold_element = isinstance( + data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype) + ) + # Avoiding using_array_manager fixture # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) @@ -369,13 +381,24 @@ def test_setitem_frame_2d_values(self, data): orig = df.copy() - df.iloc[:] = df + msg = "will attempt to set the values inplace instead" + warn = None + if has_can_hold_element and not isinstance(data.dtype, PandasDtype): + # PandasDtype excluded because it isn't *really* supported. + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:] = df self.assert_frame_equal(df, orig) df.iloc[:-1] = df.iloc[:-1] self.assert_frame_equal(df, orig) - df.iloc[:] = df.values + if isinstance(data.dtype, DatetimeTZDtype): + # no warning bc df.values casts to object dtype + warn = None + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:] = df.values self.assert_frame_equal(df, orig) if not using_array_manager: # GH#33457 Check that this setting occurred in-place diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index 8b2bc60953e3e..cf4af32fc887a 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -36,7 +36,10 @@ def test_loc_setitem_multiindex_columns(self, consolidate): A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32) assert (A.dtypes == np.float32).all() - A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32) + assert (A.dtypes == np.float32).all() A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32) @@ -129,7 +132,10 @@ def test_iloc_setitem_unnecesssary_float_upcasting(): orig = df.copy() values = df[0].values.reshape(2, 1) - df.iloc[:, 0:1] = values + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0:1] = values tm.assert_frame_equal(df, orig) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 46bb7b3bfddab..5cac63e41da67 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -688,7 +688,7 @@ def test_getitem_setitem_boolean_multi(self): expected.loc[[0, 2], [1]] = 5 tm.assert_frame_equal(df, expected) - def test_getitem_setitem_float_labels(self): + def test_getitem_setitem_float_labels(self, using_array_manager): index = Index([1.5, 2, 3, 4, 5]) df = DataFrame(np.random.randn(5, 5), index=index) @@ -771,7 +771,10 @@ def test_getitem_setitem_float_labels(self): assert len(result) == 5 cp = df.copy() - cp.loc[1.0:5.0] = 0 + warn = FutureWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + cp.loc[1.0:5.0] = 0 result = cp.loc[1.0:5.0] assert (result == 0).values.all() diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index f1e7b18a73173..fda37fdedb92a 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -399,10 +399,14 @@ def test_setitem_frame_length_0_str_key(self, indexer): def test_setitem_frame_duplicate_columns(self, using_array_manager): # GH#15695 + warn = FutureWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) df.loc[0, "A"] = (0, 3) - df.loc[:, "B"] = (1, 4) + with tm.assert_produces_warning(warn, match=msg): + df.loc[:, "B"] = (1, 4) df["C"] = (2, 5) expected = DataFrame( [ @@ -769,7 +773,9 @@ def test_setitem_string_column_numpy_dtype_raising(self): def test_setitem_empty_df_duplicate_columns(self): # GH#38521 df = DataFrame(columns=["a", "b", "b"], dtype="float64") - df.loc[:, "a"] = list(range(2)) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, "a"] = list(range(2)) expected = DataFrame( [[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"] ) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 62d1f89814206..d65a5ebe56ee2 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -365,7 +365,7 @@ def test_where_bug_transposition(self): result = a.where(do_not_replace, b) tm.assert_frame_equal(result, expected) - def test_where_datetime(self): + def test_where_datetime(self, using_array_manager): # GH 3311 df = DataFrame( @@ -385,7 +385,11 @@ def test_where_datetime(self): expected = df.copy() expected.loc[[0, 1], "A"] = np.nan - expected.loc[:, "C"] = np.nan + + warn = FutureWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + expected.loc[:, "C"] = np.nan tm.assert_frame_equal(result, expected) def test_where_none(self): @@ -513,7 +517,7 @@ def test_where_axis_with_upcast(self): assert return_value is None tm.assert_frame_equal(result, expected) - def test_where_axis_multiple_dtypes(self): + def test_where_axis_multiple_dtypes(self, using_array_manager): # Multiple dtypes (=> multiple Blocks) df = pd.concat( [ @@ -569,7 +573,10 @@ def test_where_axis_multiple_dtypes(self): d2 = df.copy().drop(1, axis=1) expected = df.copy() - expected.loc[:, 1] = np.nan + warn = FutureWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + expected.loc[:, 1] = np.nan result = df.where(mask, d2) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index f61529659e9d5..fc804836f9a9b 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -90,7 +90,10 @@ def test_diff_datetime_with_nat_zero_periods(self, tz): df = ser.to_frame() df[1] = ser.copy() - df.iloc[:, 0] = pd.NaT + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0] = pd.NaT expected = df - df assert expected[0].isna().all() diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 43cecc6a1aed5..62351aa89c914 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -220,7 +220,9 @@ def test_dropna_with_duplicate_columns(self): df.iloc[2, [0, 1, 2]] = np.nan df.iloc[0, 0] = np.nan df.iloc[1, 1] = np.nan - df.iloc[:, 3] = np.nan + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 3] = np.nan expected = df.dropna(subset=["A", "B", "C"], how="all") expected.columns = ["A", "A", "B", "C"] diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 655624a4b59ff..798212f957e3c 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -704,7 +704,7 @@ def test_quantile_ea_all_na(self, obj, index): obj.iloc[:] = index._na_value # TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed - obj[:] = obj.astype(index.dtype) + obj = obj.astype(index.dtype) assert np.all(obj.dtypes == index.dtype) # result should be invariant to shuffling diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 33fb191027c27..b1594660caec6 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -176,7 +176,10 @@ def test_rename_nocopy(self, float_frame): assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) - renamed.loc[:, "foo"] = 1.0 + with tm.assert_produces_warning(None): + # This loc setitem already happens inplace, so no warning + # that this will change in the future + renamed.loc[:, "foo"] = 1.0 assert (float_frame["C"] == 1.0).all() def test_rename_inplace(self, float_frame): diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 1ab63454f11bd..8f60fe957a39b 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -354,17 +354,23 @@ def test_shift_empty(self): tm.assert_frame_equal(df, rs) - def test_shift_duplicate_columns(self): + def test_shift_duplicate_columns(self, using_array_manager): # GH#9092; verify that position-based shifting works # in the presence of duplicate columns column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] data = np.random.randn(20, 5) + warn = None + if using_array_manager: + warn = FutureWarning + shifted = [] for columns in column_lists: df = DataFrame(data.copy(), columns=columns) for s in range(5): - df.iloc[:, s] = df.iloc[:, s].shift(s + 1) + msg = "will attempt to set the values inplace" + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:, s] = df.iloc[:, s].shift(s + 1) df.columns = range(5) shifted.append(df) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index bc5c68e4ec18d..5d1cc3d4ecee5 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -44,6 +44,9 @@ def test_sort_index_and_reconstruction_doc_example(self): assert result.index.is_monotonic_increasing tm.assert_frame_equal(result, expected) + # FIXME: the FutureWarning is issued on a setitem-with-expansion + # which will *not* change behavior, so should not get a warning. + @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") def test_sort_index_non_existent_label_multiindex(self): # GH#12261 df = DataFrame(0, columns=[], index=MultiIndex.from_product([[], []])) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 865994519549b..cd6397b053803 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -318,18 +318,23 @@ def test_dup_columns_across_dtype(self): xp.columns = ["A", "A", "B"] tm.assert_frame_equal(rs, xp) - def test_set_value_by_index(self): + def test_set_value_by_index(self, using_array_manager): # See gh-12344 + warn = FutureWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + df = DataFrame(np.arange(9).reshape(3, 3).T) df.columns = list("AAA") expected = df.iloc[:, 2] - df.iloc[:, 0] = 3 + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 2], expected) df = DataFrame(np.arange(9).reshape(3, 3).T) df.columns = [2, float(2), str(2)] expected = df.iloc[:, 1] - df.iloc[:, 0] = 3 + with tm.assert_produces_warning(warn, match=msg): + df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 1], expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7f2a13862f4ed..5226510f03195 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1722,13 +1722,17 @@ def test_mad_nullable_integer_all_na(any_signed_int_ea_dtype): df2 = df.astype(any_signed_int_ea_dtype) # case with all-NA row/column - df2.iloc[:, 1] = pd.NA # FIXME(GH#44199): this doesn't operate in-place - df2.iloc[:, 1] = pd.array([pd.NA] * len(df2), dtype=any_signed_int_ea_dtype) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df2.iloc[:, 1] = pd.NA # FIXME(GH#44199): this doesn't operate in-place + df2.iloc[:, 1] = pd.array([pd.NA] * len(df2), dtype=any_signed_int_ea_dtype) + with tm.assert_produces_warning( FutureWarning, match="The 'mad' method is deprecated" ): result = df2.mad() expected = df.mad() + expected[1] = pd.NA expected = expected.astype("Float64") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index fedcc0e2a2284..69e5d5e3d5447 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -22,9 +22,13 @@ class TestDataFrameReshape: - def test_stack_unstack(self, float_frame): + def test_stack_unstack(self, float_frame, using_array_manager): + warn = FutureWarning if using_array_manager else None + msg = "will attempt to set the values inplace" + df = float_frame.copy() - df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) + with tm.assert_produces_warning(warn, match=msg): + df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) stacked = df.stack() stacked_df = DataFrame({"foo": stacked, "bar": stacked}) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index bdb59db1a1b38..19dfe20a3a68d 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -538,8 +538,11 @@ def test_loc_setitem_single_column_slice(): columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]), ) expected = df.copy() - df.loc[:, "B"] = np.arange(4) - expected.iloc[:, 2] = np.arange(4) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, "B"] = np.arange(4) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected.iloc[:, 2] = np.arange(4) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 19ea6753c616c..c61f3c028f129 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -78,9 +78,14 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage df = frame.copy() orig_vals = df.values - indexer(df)[key, 0] = cat overwrite = isinstance(key, slice) and key == slice(None) + warn = None + if overwrite: + warn = FutureWarning + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(warn, match=msg): + indexer(df)[key, 0] = cat if overwrite: # TODO: GH#39986 this probably shouldn't behave differently @@ -101,7 +106,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) df = frame.copy() orig_vals = df.values - indexer(df)[key, 0] = cat + with tm.assert_produces_warning(FutureWarning, match=msg): + indexer(df)[key, 0] = cat expected = DataFrame({0: cat, 1: range(3)}) tm.assert_frame_equal(df, expected) @@ -884,7 +890,9 @@ def test_iloc_setitem_categorical_updates_inplace(self): assert tm.shares_memory(df[1], cat) # This should modify our original values in-place - df.iloc[:, 0] = cat[::-1] + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0] = cat[::-1] assert tm.shares_memory(df[1], cat) expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"]) @@ -1271,7 +1279,10 @@ def test_iloc_setitem_dtypes_duplicate_columns( ): # GH#22035 df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"]) - df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes) + expected_df = DataFrame( [[expected_value, "str", "str2"]], columns=["a", "b", "b"] ) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ec44511fdffec..dcb06f1cf778d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -547,14 +547,17 @@ def test_astype_assignment(self): ) df = df_orig.copy() - df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) tm.assert_frame_equal(df, expected) df = df_orig.copy() - df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) @@ -562,14 +565,16 @@ def test_astype_assignment(self): # GH5702 (loc) df = df_orig.copy() - df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) expected = DataFrame( [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) tm.assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) expected = DataFrame( [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) @@ -578,12 +583,15 @@ def test_astype_assignment(self): def test_astype_assignment_full_replacements(self): # full replacements / no nans df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) - df.iloc[:, 0] = df["A"].astype(np.int64) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[:, 0] = df["A"].astype(np.int64) expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) - df.loc[:, "A"] = df["A"].astype(np.int64) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, "A"] = df["A"].astype(np.int64) expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 1cd96bff4177d..1e5e65786a4aa 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -367,7 +367,9 @@ def test_loc_setitem_dtype(self): # GH31340 df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}) cols = ["a", "b", "c"] - df.loc[:, cols] = df.loc[:, cols].astype("float32") + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, cols] = df.loc[:, cols].astype("float32") expected = DataFrame( { @@ -598,7 +600,9 @@ def test_loc_setitem_consistency_empty(self): expected = DataFrame(columns=["x", "y"]) expected["x"] = expected["x"].astype(np.int64) df = DataFrame(columns=["x", "y"]) - df.loc[:, "x"] = 1 + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, "x"] = 1 tm.assert_frame_equal(df, expected) df = DataFrame(columns=["x", "y"]) @@ -629,20 +633,29 @@ def test_loc_setitem_consistency_slice_column_len(self): ] df = DataFrame(values, index=mi, columns=cols) - df.loc[:, ("Respondent", "StartDate")] = to_datetime( - df.loc[:, ("Respondent", "StartDate")] - ) - df.loc[:, ("Respondent", "EndDate")] = to_datetime( - df.loc[:, ("Respondent", "EndDate")] - ) - df.loc[:, ("Respondent", "Duration")] = ( - df.loc[:, ("Respondent", "EndDate")] - - df.loc[:, ("Respondent", "StartDate")] - ) + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, ("Respondent", "StartDate")] = to_datetime( + df.loc[:, ("Respondent", "StartDate")] + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, ("Respondent", "EndDate")] = to_datetime( + df.loc[:, ("Respondent", "EndDate")] + ) + with tm.assert_produces_warning(None, match=msg): + # Adding a new key -> no warning + df.loc[:, ("Respondent", "Duration")] = ( + df.loc[:, ("Respondent", "EndDate")] + - df.loc[:, ("Respondent", "StartDate")] + ) + + with tm.assert_produces_warning(None, match=msg): + # timedelta64[s] -> float64, so this cannot be done inplace, so + # no warning + df.loc[:, ("Respondent", "Duration")] = df.loc[ + :, ("Respondent", "Duration") + ].astype("timedelta64[s]") - df.loc[:, ("Respondent", "Duration")] = df.loc[ - :, ("Respondent", "Duration") - ].astype("timedelta64[s]") expected = Series( [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") ) @@ -707,7 +720,9 @@ def test_loc_setitem_frame_with_reindex_mixed(self): # GH#40480 df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) df["B"] = "string" - df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") expected = DataFrame({"A": ser}) expected["B"] = "string" @@ -717,7 +732,9 @@ def test_loc_setitem_frame_with_inverted_slice(self): # GH#40480 df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float) df["B"] = "string" - df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3]) tm.assert_frame_equal(df, expected) @@ -890,7 +907,14 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): def test_loc_setitem_missing_columns(self, index, box, expected): # GH 29334 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) - df.loc[index] = box + + warn = None + if isinstance(index[0], slice) and index[0] == slice(None): + warn = FutureWarning + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(warn, match=msg): + df.loc[index] = box tm.assert_frame_equal(df, expected) def test_loc_coercion(self): @@ -1123,6 +1147,8 @@ def test_loc_uint64_disallow_negative(self): # don't wrap around ser.loc[[-1]] + # FIXME: warning issued here is false-positive + @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") def test_loc_setitem_empty_append_expands_rows(self): # GH6173, various appends to an empty dataframe @@ -1134,6 +1160,8 @@ def test_loc_setitem_empty_append_expands_rows(self): df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) + # FIXME: warning issued here is false-positive + @pytest.mark.filterwarnings("ignore:.*will attempt to set.*:FutureWarning") def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): # GH#37932 same as test_loc_setitem_empty_append_expands_rows # but with mixed dtype so we go through take_split_path @@ -1379,7 +1407,10 @@ def test_loc_setitem_single_row_categorical(self): # GH#25495 df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) - df.loc[:, "Alpha"] = categories + + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, "Alpha"] = categories result = df["Alpha"] expected = Series(categories, index=df.index, name="Alpha") diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 8251f09b97062..3c57970d46bc9 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -309,7 +309,9 @@ def test_partial_setting_frame(self): expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) df = df_orig.copy() df["B"] = df["B"].astype(np.float64) - df.loc[:, "B"] = df.loc[:, "A"] + msg = "will attempt to set the values inplace instead" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[:, "B"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # single dtype frame, partial setting diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 6c96d10c5911f..40a50c55de2a4 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -168,7 +168,7 @@ def test_append_some_nans(setup_path): # first column df1 = df.copy() - df1.loc[:, "A1"] = np.nan + df1["A1"] = np.nan _maybe_remove(store, "df1") store.append("df1", df1[:10]) store.append("df1", df1[10:]) @@ -176,7 +176,7 @@ def test_append_some_nans(setup_path): # 2nd column df2 = df.copy() - df2.loc[:, "A2"] = np.nan + df2["A2"] = np.nan _maybe_remove(store, "df2") store.append("df2", df2[:10]) store.append("df2", df2[10:]) @@ -184,7 +184,7 @@ def test_append_some_nans(setup_path): # datetimes df3 = df.copy() - df3.loc[:, "E"] = np.nan + df3["E"] = np.nan _maybe_remove(store, "df3") store.append("df3", df3[:10]) store.append("df3", df3[10:]) @@ -622,7 +622,7 @@ def check_col(key, name, size): df_dc["string"] = "foo" df_dc.loc[df_dc.index[4:6], "string"] = np.nan df_dc.loc[df_dc.index[7:9], "string"] = "bar" - df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs() + df_dc[["B", "C"]] = df_dc[["B", "C"]].abs() df_dc["string2"] = "cool" # on-disk operations diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 96882c6811683..0dd1fa175fa3f 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -173,7 +173,7 @@ def test_date_time(datapath): fname, parse_dates=["Date1", "Date2", "DateTime", "DateTimeHi", "Taiw"] ) # GH 19732: Timestamps imported from sas will incur floating point errors - df.iloc[:, 3] = df.iloc[:, 3].dt.round("us") + df[df.columns[3]] = df.iloc[:, 3].dt.round("us") tm.assert_frame_equal(df, df0) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c21673af2d979..dafe1b4a3607d 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1694,7 +1694,7 @@ def test_mixed_string_strl(self): tm.assert_frame_equal(reread, expected) # Check strl supports all None (null) - output.loc[:, "mixed"] = None + output["mixed"] = None output.to_stata( path, write_index=False, convert_strl=["mixed"], version=117 ) @@ -1706,7 +1706,7 @@ def test_mixed_string_strl(self): def test_all_none_exception(self, version): output = [{"none": "none", "number": 0}, {"none": None, "number": 1}] output = DataFrame(output) - output.loc[:, "none"] = None + output["none"] = None with tm.ensure_clean() as path: with pytest.raises(ValueError, match="Column `none` cannot be exported"): output.to_stata(path, version=version)