Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: first try inplace setitem for .at indexer #49772

Merged
merged 9 commits into from
Dec 9, 2022
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Fixed regressions
from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`)
- Fixed regression in :func:`date_range` returning an invalid set of periods for ``CustomBusinessDay`` frequency and ``start`` date with timezone (:issue:`49441`)
- Fixed performance regression in groupby operations (:issue:`49676`)
-
- Fixed performance regression in the :meth:`~DataFrame.at` indexer (:issue:`49771`)

.. ---------------------------------------------------------------------------
.. _whatsnew_152.bug_fixes:
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
)

from pandas.core.dtypes.cast import (
LossySetitemError,
can_hold_element,
construct_1d_arraylike_from_scalar,
construct_2d_arraylike_from_scalar,
Expand Down Expand Up @@ -4135,10 +4136,10 @@ def _set_value(
else:
icol = self.columns.get_loc(col)
iindex = self.index.get_loc(index)
self._mgr.column_setitem(icol, iindex, value)
self._mgr.column_setitem(icol, iindex, value, inplace=True)
self._clear_item_cache()

except (KeyError, TypeError, ValueError):
except (KeyError, TypeError, ValueError, LossySetitemError):
jorisvandenbossche marked this conversation as resolved.
Show resolved Hide resolved
# get_loc might raise a KeyError for missing labels (falling back
# to (i)loc will do expansion of the index)
# column_setitem will do validation that may raise TypeError or ValueError
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,9 @@ def iset(
self.arrays[mgr_idx] = value_arr
return

def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:
def column_setitem(
self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False
) -> None:
"""
Set values ("setitem") into a single column (not setting the full column).

Expand All @@ -870,9 +872,12 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None
raise TypeError("The column index should be an integer")
arr = self.arrays[loc]
mgr = SingleArrayManager([arr], [self._axes[0]])
new_mgr = mgr.setitem((idx,), value)
# update existing ArrayManager in-place
self.arrays[loc] = new_mgr.arrays[0]
if inplace:
mgr.setitem_inplace(idx, value)
else:
new_mgr = mgr.setitem((idx,), value)
# update existing ArrayManager in-place
self.arrays[loc] = new_mgr.arrays[0]

def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
"""
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,9 @@ def _iset_single(
self._clear_reference_block(blkno)
return

def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:
def column_setitem(
self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False
) -> None:
"""
Set values ("setitem") into a single column (not setting the full column).

Expand All @@ -1362,8 +1364,11 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None
# this manager is only created temporarily to mutate the values in place
# so don't track references, otherwise the `setitem` would perform CoW again
col_mgr = self.iget(loc, track_ref=False)
new_mgr = col_mgr.setitem((idx,), value)
self.iset(loc, new_mgr._block.values, inplace=True)
if inplace:
col_mgr.setitem_inplace(idx, value)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
else:
new_mgr = col_mgr.setitem((idx,), value)
self.iset(loc, new_mgr._block.values, inplace=True)

def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
"""
Expand Down