From 9339f15f9999764b5e6e043a54272ad43e6f44a9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 16 Jul 2022 11:32:42 +0200 Subject: [PATCH] fixup after updata main and column_setitem + iloc inplace setitem changes (gh-45333) --- pandas/core/frame.py | 13 ------------- pandas/tests/copy_view/test_indexing.py | 8 ++++---- pandas/tests/copy_view/test_setitem.py | 10 ++++++---- pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/indexing/test_iloc.py | 9 --------- pandas/tests/indexing/test_loc.py | 16 ++++------------ 6 files changed, 15 insertions(+), 43 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9faf8b3017482..1236a7222b502 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3941,19 +3941,6 @@ def _set_value( Sets whether or not index/col interpreted as indexers """ try: - if ( - get_option("mode.copy_on_write") - and get_option("mode.data_manager") == "block" - ): - if not takeable: - icol = self.columns.get_loc(col) - index = self.index.get_loc(index) - self._mgr.column_setitem(icol, index, value) - else: - self._mgr.column_setitem(col, index, value) - self._clear_item_cache() - return - if takeable: icol = col iindex = cast(int, index) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 6eb7237c4f41c..d917a3c79aa97 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -150,7 +150,7 @@ def test_subset_column_slice(using_copy_on_write, using_array_manager, dtype): ids=["slice", "mask", "array"], ) def test_subset_loc_rows_columns( - dtype, row_indexer, column_indexer, using_array_manager + dtype, row_indexer, column_indexer, using_array_manager, using_copy_on_write ): # Case: taking a subset of the rows+columns of a DataFrame using .loc # + afterwards modifying the subset @@ -177,7 +177,7 @@ def test_subset_loc_rows_columns( if ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) - and (using_array_manager or dtype == "int64") + and (using_array_manager or (dtype == "int64" and not using_copy_on_write)) ): df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) @@ -197,7 +197,7 @@ def test_subset_loc_rows_columns( ids=["slice", "mask", "array"], ) def test_subset_iloc_rows_columns( - dtype, row_indexer, column_indexer, using_array_manager + dtype, row_indexer, column_indexer, using_array_manager, using_copy_on_write ): # Case: taking a subset of the rows+columns of a DataFrame using .iloc # + afterwards modifying the subset @@ -224,7 +224,7 @@ def test_subset_iloc_rows_columns( if ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) - and (using_array_manager or dtype == "int64") + and (using_array_manager or (dtype == "int64" and not using_copy_on_write)) ): df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py index dd42983179806..9e0d350dde0de 100644 --- a/pandas/tests/copy_view/test_setitem.py +++ b/pandas/tests/copy_view/test_setitem.py @@ -34,8 +34,9 @@ def test_set_column_with_series(using_copy_on_write): df["c"] = ser if using_copy_on_write: - # with CoW we can delay the copy - assert np.shares_memory(df["c"].values, ser.values) + # TODO(CoW) with CoW we can delay the copy + # assert np.shares_memory(df["c"].values, ser.values) + assert not np.shares_memory(df["c"].values, ser.values) else: # the series data is copied assert not np.shares_memory(df["c"].values, ser.values) @@ -78,8 +79,9 @@ def test_set_columns_with_dataframe(using_copy_on_write): df[["c", "d"]] = df2 if using_copy_on_write: - # with CoW we can delay the copy - assert np.shares_memory(df["c"].values, df2["c"].values) + # TODO(CoW) with CoW we can delay the copy + # assert np.shares_memory(df["c"].values, df2["c"].values) + assert not np.shares_memory(df["c"].values, df2["c"].values) else: # the data is copied assert not np.shares_memory(df["c"].values, df2["c"].values) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index ea320906ec37d..627e85a2b2f0c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -776,7 +776,7 @@ def test_setitem_string_column_numpy_dtype_raising(self): expected = DataFrame([[1, 2, 5], [3, 4, 6]], columns=[0, 1, "0 - Name"]) tm.assert_frame_equal(df, expected) - def test_setitem_empty_df_duplicate_columns(self): + def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write): # GH#38521 df = DataFrame(columns=["a", "b", "b"], dtype="float64") msg = "will attempt to set the values inplace instead" diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 0b818a83df8e4..2301a326e464d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -107,21 +107,12 @@ def test_iloc_setitem_fullcol_categorical( df.iloc[0, 0] = "gamma" assert cat[0] != "gamma" - # TODO with mixed dataframe ("split" path), we always overwrite the column - if using_copy_on_write and indexer == tm.loc: - # TODO(ArrayManager) with loc, slice(3) gets converted into slice(0, 3) - # which is then considered as "full" slice and does overwrite. For iloc - # this conversion is not done, and so it doesn't overwrite - overwrite = overwrite or (isinstance(key, slice) and key == slice(3)) - frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) df = frame.copy() orig_vals = df.values with tm.assert_produces_warning(FutureWarning, match=msg): indexer(df)[key, 0] = cat expected = DataFrame({0: cat, 1: range(3)}) - if using_copy_on_write and not overwrite: - expected[0] = expected[0].astype(object) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("box", [array, Series]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 20aad25a3381d..9f2ede7ecc805 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -716,34 +716,26 @@ def test_loc_setitem_frame_with_reindex(self): expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame_with_reindex_mixed(self, using_copy_on_write): + def test_loc_setitem_frame_with_reindex_mixed(self): # GH#40480 df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) df["B"] = "string" msg = "will attempt to set the values inplace instead" with tm.assert_produces_warning(FutureWarning, match=msg): df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") - ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float) - if not using_copy_on_write: - # For default BlockManager case, this takes the "split" path, - # which still overwrites the column - ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") + ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") expected = DataFrame({"A": ser}) expected["B"] = "string" tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame_with_inverted_slice(self, using_copy_on_write): + def test_loc_setitem_frame_with_inverted_slice(self): # GH#40480 df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float) df["B"] = "string" msg = "will attempt to set the values inplace instead" with tm.assert_produces_warning(FutureWarning, match=msg): df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") - expected = DataFrame({"A": [3.0, 2.0, 1.0], "B": "string"}, index=[1, 2, 3]) - if not using_copy_on_write: - # For default BlockManager case, this takes the "split" path, - # which still overwrites the column - expected["A"] = expected["A"].astype("int64") + expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3]) tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_frame(self):