diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 54ee5ed2f35d1..879a6a2a034d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -98,7 +98,6 @@ ) from pandas.core.dtypes.cast import ( - LossySetitemError, can_hold_element, construct_1d_arraylike_from_scalar, construct_2d_arraylike_from_scalar, @@ -3945,17 +3944,18 @@ def _set_value( """ try: if takeable: - series = self._ixs(col, axis=1) - loc = index + icol = col + iindex = cast(int, index) else: - series = self._get_item_cache(col) - loc = self.index.get_loc(index) - - # setitem_inplace will do validation that may raise TypeError, - # ValueError, or LossySetitemError - series._mgr.setitem_inplace(loc, value) - - except (KeyError, TypeError, ValueError, LossySetitemError): + icol = self.columns.get_loc(col) + iindex = self.index.get_loc(index) + self._mgr.column_setitem(icol, iindex, value) + self._clear_item_cache() + + except (KeyError, TypeError, ValueError): + # get_loc might raise a KeyError for missing labels (falling back + # to (i)loc will do expansion of the index) + # column_setitem will do validation that may raise TypeError or ValueError # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value diff --git a/pandas/core/indexers/__init__.py b/pandas/core/indexers/__init__.py index 86ec36144b134..6431f12a08dc8 100644 --- a/pandas/core/indexers/__init__.py +++ b/pandas/core/indexers/__init__.py @@ -4,7 +4,6 @@ check_setitem_lengths, deprecate_ndim_indexing, is_empty_indexer, - is_exact_shape_match, is_list_like_indexer, is_scalar_indexer, is_valid_positional_slice, @@ -23,7 +22,6 @@ "check_setitem_lengths", "validate_indices", "maybe_convert_indices", - "is_exact_shape_match", "length_of_indexer", "deprecate_ndim_indexing", "unpack_1tuple", diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index 8c38341e02e34..f098066d1c7d7 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -11,10 +11,7 @@ import numpy as np -from pandas._typing import ( - AnyArrayLike, - ArrayLike, -) +from pandas._typing import AnyArrayLike from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( @@ -294,27 +291,6 @@ def maybe_convert_indices(indices, n: int, verify: bool = True): # Unsorted -def is_exact_shape_match(target: ArrayLike, value: ArrayLike) -> bool: - """ - Is setting this value into this target overwriting the entire column? - - Parameters - ---------- - target : np.ndarray or ExtensionArray - value : np.ndarray or ExtensionArray - - Returns - ------- - bool - """ - return ( - len(value.shape) > 0 - and len(target.shape) > 0 - and value.shape[0] == target.shape[0] - and value.size == target.size - ) - - def length_of_indexer(indexer, target=None) -> int: """ Return the expected length of target[indexer] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 849e7b6921c4f..20ac0fedc28d1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -53,7 +53,6 @@ from pandas.core.indexers import ( check_array_indexer, is_empty_indexer, - is_exact_shape_match, is_list_like_indexer, is_scalar_indexer, length_of_indexer, @@ -1951,8 +1950,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): """ pi = plane_indexer - ser = self.obj._ixs(loc, axis=1) - orig_values = ser._values + orig_values = self.obj._get_column_array(loc) # perform the equivalent of a setitem on the info axis # as we have a null slice or a slice with full bounds @@ -1963,7 +1961,8 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): pass elif ( is_array_like(value) - and is_exact_shape_match(ser, value) + and len(value.shape) > 0 + and self.obj.shape[0] == value.shape[0] and not is_empty_indexer(pi) ): if is_list_like(pi): @@ -1972,31 +1971,23 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): # in case of slice value = value[pi] else: - # set the item, first attempting to operate inplace, then - # falling back to casting if necessary; see - # _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace - - orig_values = ser._values - ser._mgr = ser._mgr.setitem((pi,), value) - - if ser._values is orig_values: - # The setitem happened inplace, so the DataFrame's values - # were modified inplace. - return - self.obj._iset_item(loc, ser) + # set value into the column (first attempting to operate inplace, then + # falling back to casting if necessary) + self.obj._mgr.column_setitem(loc, plane_indexer, value) + self.obj._clear_item_cache() return # We will not operate in-place, but will attempt to in the future. # To determine whether we need to issue a FutureWarning, see if the # setting in-place would work, i.e. behavior will change. - warn = can_hold_element(ser._values, value) + warn = can_hold_element(orig_values, value) # Don't issue the warning yet, as we can still trim a few cases where # behavior will not change. self.obj._iset_item(loc, value) if warn: - new_values = self.obj._ixs(loc, axis=1)._values + new_values = self.obj._get_column_array(loc) if ( isinstance(new_values, np.ndarray) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 0c16ddbbc21ab..ee6c183898079 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -36,6 +36,7 @@ is_datetime64_ns_dtype, is_dtype_equal, is_extension_array_dtype, + is_integer, is_numeric_dtype, is_object_dtype, is_timedelta64_ns_dtype, @@ -869,6 +870,21 @@ def iset( self.arrays[mgr_idx] = value_arr return + def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + """ + Set values ("setitem") into a single column (not setting the full column). + + This is a method on the ArrayManager level, to avoid creating an + intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) + """ + if not is_integer(loc): + raise TypeError("The column index should be an integer") + arr = self.arrays[loc] + mgr = SingleArrayManager([arr], [self._axes[0]]) + new_mgr = mgr.setitem((idx,), value) + # update existing ArrayManager in-place + self.arrays[loc] = new_mgr.arrays[0] + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ Insert item at selected position. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f530c58bbd0c0..7cccc9833de6b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1188,6 +1188,17 @@ def _iset_single( self.blocks = new_blocks return + def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + """ + Set values ("setitem") into a single column (not setting the full column). + + This is a method on the BlockManager level, to avoid creating an + intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) + """ + col_mgr = self.iget(loc) + new_mgr = col_mgr.setitem((idx,), value) + self.iset(loc, new_mgr._block.values, inplace=True) + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ Insert item at selected position. diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 75bef4f34c99e..cf6d351aa78a0 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1099,8 +1099,6 @@ def test_setitem_partial_column_inplace(self, consolidate, using_array_manager): # check setting occurred in-place tm.assert_numpy_array_equal(zvals, expected.values) assert np.shares_memory(zvals, df["z"]._values) - if not consolidate: - assert df["z"]._values is zvals def test_setitem_duplicate_columns_not_inplace(self): # GH#39510 diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index a2f02378d061a..96c73b007cef3 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -10,6 +10,7 @@ CategoricalDtype, CategoricalIndex, DataFrame, + MultiIndex, Series, Timestamp, ) @@ -96,6 +97,18 @@ def test_at_setitem_categorical_missing(self): tm.assert_frame_equal(df, expected) + def test_at_setitem_multiindex(self): + df = DataFrame( + np.zeros((3, 2), dtype="int64"), + columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), + ) + df.at[0, "a"] = 10 + expected = DataFrame( + [[10, 10], [0, 0], [0, 0]], + columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), + ) + tm.assert_frame_equal(df, expected) + class TestAtSetItemWithExpansion: def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 3c57970d46bc9..938056902e745 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -266,7 +266,7 @@ def test_partial_setting(self): with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 - def test_partial_setting_frame(self): + def test_partial_setting_frame(self, using_array_manager): df_orig = DataFrame( np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" ) @@ -279,6 +279,8 @@ def test_partial_setting_frame(self): df.iloc[4, 2] = 5.0 msg = "index 2 is out of bounds for axis 0 with size 2" + if using_array_manager: + msg = "list index out of range" with pytest.raises(IndexError, match=msg): df.iat[4, 2] = 5.0