Skip to content

Commit

Permalink
DEPR: df.iloc[:, foo] = bar attempt to set inplace (#45333)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored May 23, 2022
1 parent d2a7eff commit 46bcf37
Show file tree
Hide file tree
Showing 27 changed files with 334 additions and 73 deletions.
1 change: 1 addition & 0 deletions doc/source/user_guide/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ Setting values by position:
Setting by assigning with a NumPy array:

.. ipython:: python
:okwarning:
df.loc[:, "D"] = np.array([5] * len(df))
Expand Down
67 changes: 67 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,73 @@ As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and
raise a ``FutureWarning``. This can be silenced and the previous behavior
retained by specifying ``group_keys=False``.

.. _whatsnew_150.notable_bug_fixes.setitem_column_try_inplace:
_ see also _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace

Try operating inplace when setting values with ``loc`` and ``iloc``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Most of the time setting values with ``frame.iloc`` attempts to set values
in-place, only falling back to inserting a new array if necessary. In the past,
setting entire columns has been an exception to this rule:

.. ipython:: python
values = np.arange(4).reshape(2, 2)
df = pd.DataFrame(values)
ser = df[0]
*Old behavior*:

.. code-block:: ipython
In [3]: df.iloc[:, 0] = np.array([10, 11])
In [4]: ser
Out[4]:
0 0
1 2
Name: 0, dtype: int64
This behavior is deprecated. In a future version, setting an entire column with
iloc will attempt to operate inplace.

*Future behavior*:

.. code-block:: ipython
In [3]: df.iloc[:, 0] = np.array([10, 11])
In [4]: ser
Out[4]:
0 10
1 11
Name: 0, dtype: int64
To get the old behavior, use :meth:`DataFrame.__setitem__` directly:

*Future behavior*:

.. code-block:: ipython
In [5]: df[0] = np.array([21, 31])
In [4]: ser
Out[4]:
0 10
1 11
Name: 0, dtype: int64
In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`:

*Future behavior*:

.. code-block:: ipython
In [5]: df.columns = ["A", "A"]
In [5]: df.isetitem(0, np.array([21, 31]))
In [4]: ser
Out[4]:
0 10
1 11
Name: 0, dtype: int64
.. _whatsnew_150.deprecations.numeric_only_default:

``numeric_only`` default value
Expand Down
25 changes: 24 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3687,6 +3687,29 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
loc = engine.get_loc(index)
return series._values[loc]

def isetitem(self, loc, value) -> None:
"""
Set the given value in the column with position 'loc'.
This is a positional analogue to __setitem__.
Parameters
----------
loc : int or sequence of ints
value : scalar or arraylike
Notes
-----
Unlike `frame.iloc[:, i] = value`, `frame.isetitem(loc, value)` will
_never_ try to set the values in place, but will always insert a new
array.
In cases where `frame.columns` is unique, this is equivalent to
`frame[frame.columns[i]] = value`.
"""
arraylike = self._sanitize_column(value)
self._iset_item_mgr(loc, arraylike, inplace=False)

def __setitem__(self, key, value):
key = com.apply_if_callable(key, self)

Expand Down Expand Up @@ -5467,7 +5490,7 @@ def _replace_columnwise(
target, value = mapping[ax[i]]
newobj = ser.replace(target, value, regex=regex)

res.iloc[:, i] = newobj
res._iset_item(i, newobj)

if inplace:
return
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2257,7 +2257,13 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default
counts = self.count()
result_ilocs = result.columns.get_indexer_for(cols)
count_ilocs = counts.columns.get_indexer_for(cols)
result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
with warnings.catch_warnings():
# TODO(2.0): once iloc[:, foo] = bar depecation is enforced,
# this catching will be unnecessary
warnings.filterwarnings(
"ignore", ".*will attempt to set the values inplace.*"
)
result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
return result

@final
Expand Down
48 changes: 41 additions & 7 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1937,24 +1937,25 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
pi = plane_indexer

ser = self.obj._ixs(loc, axis=1)
orig_values = ser._values

# perform the equivalent of a setitem on the info axis
# as we have a null slice or a slice with full bounds
# which means essentially reassign to the columns of a
# multi-dim object
# GH#6149 (null slice), GH#10408 (full bounds)
if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
ser = value
pass
elif (
is_array_like(value)
and is_exact_shape_match(ser, value)
and not is_empty_indexer(pi)
):
if is_list_like(pi):
ser = value[np.argsort(pi)]
value = value[np.argsort(pi)]
else:
# in case of slice
ser = value[pi]
value = value[pi]
else:
# set the item, first attempting to operate inplace, then
# falling back to casting if necessary; see
Expand All @@ -1970,8 +1971,40 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
self.obj._iset_item(loc, ser)
return

# reset the sliced object if unique
self.obj._iset_item(loc, ser)
# We will not operate in-place, but will attempt to in the future.
# To determine whether we need to issue a FutureWarning, see if the
# setting in-place would work, i.e. behavior will change.
warn = can_hold_element(ser._values, value)
# Don't issue the warning yet, as we can still trim a few cases where
# behavior will not change.

self.obj._iset_item(loc, value)

if warn:
new_values = self.obj._ixs(loc, axis=1)._values

if (
isinstance(new_values, np.ndarray)
and isinstance(orig_values, np.ndarray)
and np.shares_memory(new_values, orig_values)
):
# TODO: get something like tm.shares_memory working?
# The values were set inplace after all, no need to warn,
# e.g. test_rename_nocopy
pass
else:
warnings.warn(
"In a future version, `df.iloc[:, i] = newvals` will attempt "
"to set the values inplace instead of always setting a new "
"array. To retain the old behavior, use either "
"`df[df.columns[i]] = newvals` or, if columns are non-unique, "
"`df.isetitem(i, newvals)`",
FutureWarning,
stacklevel=find_stack_level(),
)
# TODO: how to get future behavior?
# TODO: what if we got here indirectly via loc?
return

def _setitem_single_block(self, indexer, value, name: str):
"""
Expand All @@ -1981,7 +2014,6 @@ def _setitem_single_block(self, indexer, value, name: str):

info_axis = self.obj._info_axis_number
item_labels = self.obj._get_axis(info_axis)

if isinstance(indexer, tuple):

# if we are setting on the info axis ONLY
Expand All @@ -1996,7 +2028,9 @@ def _setitem_single_block(self, indexer, value, name: str):
if len(item_labels.get_indexer_for([col])) == 1:
# e.g. test_loc_setitem_empty_append_expands_rows
loc = item_labels.get_loc(col)
self.obj._iset_item(loc, value)
# Go through _setitem_single_column to get
# FutureWarning if relevant.
self._setitem_single_column(loc, value, indexer[0])
return

indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
Expand Down
27 changes: 25 additions & 2 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
IntervalDtype,
PandasDtype,
PeriodDtype,
)

import pandas as pd
import pandas._testing as tm
from pandas.tests.extension.base.base import BaseExtensionTests
Expand Down Expand Up @@ -361,6 +368,11 @@ def test_setitem_frame_2d_values(self, data):
# GH#44514
df = pd.DataFrame({"A": data})

# These dtypes have non-broken implementations of _can_hold_element
has_can_hold_element = isinstance(
data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
)

# Avoiding using_array_manager fixture
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
Expand All @@ -369,13 +381,24 @@ def test_setitem_frame_2d_values(self, data):

orig = df.copy()

df.iloc[:] = df
msg = "will attempt to set the values inplace instead"
warn = None
if has_can_hold_element and not isinstance(data.dtype, PandasDtype):
# PandasDtype excluded because it isn't *really* supported.
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
df.iloc[:] = df
self.assert_frame_equal(df, orig)

df.iloc[:-1] = df.iloc[:-1]
self.assert_frame_equal(df, orig)

df.iloc[:] = df.values
if isinstance(data.dtype, DatetimeTZDtype):
# no warning bc df.values casts to object dtype
warn = None
with tm.assert_produces_warning(warn, match=msg):
df.iloc[:] = df.values
self.assert_frame_equal(df, orig)
if not using_array_manager:
# GH#33457 Check that this setting occurred in-place
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def test_loc_setitem_multiindex_columns(self, consolidate):
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()

A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
msg = "will attempt to set the values inplace instead"
with tm.assert_produces_warning(FutureWarning, match=msg):
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)

assert (A.dtypes == np.float32).all()

A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
Expand Down Expand Up @@ -129,7 +132,10 @@ def test_iloc_setitem_unnecesssary_float_upcasting():
orig = df.copy()

values = df[0].values.reshape(2, 1)
df.iloc[:, 0:1] = values

msg = "will attempt to set the values inplace instead"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.iloc[:, 0:1] = values

tm.assert_frame_equal(df, orig)

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ def test_getitem_setitem_boolean_multi(self):
expected.loc[[0, 2], [1]] = 5
tm.assert_frame_equal(df, expected)

def test_getitem_setitem_float_labels(self):
def test_getitem_setitem_float_labels(self, using_array_manager):
index = Index([1.5, 2, 3, 4, 5])
df = DataFrame(np.random.randn(5, 5), index=index)

Expand Down Expand Up @@ -771,7 +771,10 @@ def test_getitem_setitem_float_labels(self):
assert len(result) == 5

cp = df.copy()
cp.loc[1.0:5.0] = 0
warn = FutureWarning if using_array_manager else None
msg = "will attempt to set the values inplace"
with tm.assert_produces_warning(warn, match=msg):
cp.loc[1.0:5.0] = 0
result = cp.loc[1.0:5.0]
assert (result == 0).values.all()

Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,10 +399,14 @@ def test_setitem_frame_length_0_str_key(self, indexer):

def test_setitem_frame_duplicate_columns(self, using_array_manager):
# GH#15695
warn = FutureWarning if using_array_manager else None
msg = "will attempt to set the values inplace"

cols = ["A", "B", "C"] * 2
df = DataFrame(index=range(3), columns=cols)
df.loc[0, "A"] = (0, 3)
df.loc[:, "B"] = (1, 4)
with tm.assert_produces_warning(warn, match=msg):
df.loc[:, "B"] = (1, 4)
df["C"] = (2, 5)
expected = DataFrame(
[
Expand Down Expand Up @@ -769,7 +773,9 @@ def test_setitem_string_column_numpy_dtype_raising(self):
def test_setitem_empty_df_duplicate_columns(self):
# GH#38521
df = DataFrame(columns=["a", "b", "b"], dtype="float64")
df.loc[:, "a"] = list(range(2))
msg = "will attempt to set the values inplace instead"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.loc[:, "a"] = list(range(2))
expected = DataFrame(
[[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"]
)
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def test_where_bug_transposition(self):
result = a.where(do_not_replace, b)
tm.assert_frame_equal(result, expected)

def test_where_datetime(self):
def test_where_datetime(self, using_array_manager):

# GH 3311
df = DataFrame(
Expand All @@ -385,7 +385,11 @@ def test_where_datetime(self):

expected = df.copy()
expected.loc[[0, 1], "A"] = np.nan
expected.loc[:, "C"] = np.nan

warn = FutureWarning if using_array_manager else None
msg = "will attempt to set the values inplace"
with tm.assert_produces_warning(warn, match=msg):
expected.loc[:, "C"] = np.nan
tm.assert_frame_equal(result, expected)

def test_where_none(self):
Expand Down Expand Up @@ -513,7 +517,7 @@ def test_where_axis_with_upcast(self):
assert return_value is None
tm.assert_frame_equal(result, expected)

def test_where_axis_multiple_dtypes(self):
def test_where_axis_multiple_dtypes(self, using_array_manager):
# Multiple dtypes (=> multiple Blocks)
df = pd.concat(
[
Expand Down Expand Up @@ -569,7 +573,10 @@ def test_where_axis_multiple_dtypes(self):

d2 = df.copy().drop(1, axis=1)
expected = df.copy()
expected.loc[:, 1] = np.nan
warn = FutureWarning if using_array_manager else None
msg = "will attempt to set the values inplace"
with tm.assert_produces_warning(warn, match=msg):
expected.loc[:, 1] = np.nan

result = df.where(mask, d2)
tm.assert_frame_equal(result, expected)
Expand Down
Loading

0 comments on commit 46bcf37

Please sign in to comment.