Skip to content

Commit

Permalink
BUG: df.iloc[:, 0] = df.iloc[::-1, 0] not setting inplace for EAs (#4…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Mar 1, 2022
1 parent 6caefb1 commit 3960157
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 33 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ Indexing
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
- Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`)
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`)
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`)
- Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`)
- Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`)
Expand Down
14 changes: 1 addition & 13 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1636,21 +1636,9 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
return self.values

def set_inplace(self, locs, values: ArrayLike) -> None:
# NB: This is a misnomer, is supposed to be inplace but is not,
# see GH#33457
# When an ndarray, we should have locs.tolist() == [0]
# When a BlockPlacement we should have list(locs) == [0]

# error: Incompatible types in assignment (expression has type
# "Union[ExtensionArray, ndarray[Any, Any]]", variable has type
# "ExtensionArray")
self.values = values # type: ignore[assignment]
try:
# TODO(GH33457) this can be removed
self._cache.clear()
except AttributeError:
# _cache not yet initialized
pass
self.values[:] = values

def _maybe_squeeze_arg(self, arg):
"""
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,14 @@ def test_setitem_series(self, data, full_indexer):
def test_setitem_frame_2d_values(self, data):
# GH#44514
df = pd.DataFrame({"A": data})

# Avoiding using_array_manager fixture
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)

df = pd.DataFrame({"A": data})
blk_data = df._mgr.arrays[0]

orig = df.copy()

df.iloc[:] = df
Expand All @@ -370,6 +378,10 @@ def test_setitem_frame_2d_values(self, data):

df.iloc[:] = df.values
self.assert_frame_equal(df, orig)
if not using_array_manager:
# GH#33457 Check that this setting occurred in-place
# FIXME(ArrayManager): this should work there too
assert df._mgr.arrays[0] is blk_data

df.iloc[:-1] = df.values[:-1]
self.assert_frame_equal(df, orig)
Expand Down
39 changes: 22 additions & 17 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2519,6 +2519,7 @@ def test_dict_nocopy(
return

c = pd.array([1, 2], dtype=any_numeric_ea_dtype)
c_orig = c.copy()
df = DataFrame({"a": a, "b": b, "c": c}, copy=copy)

def get_base(obj):
Expand All @@ -2530,9 +2531,19 @@ def get_base(obj):
else:
raise TypeError

def check_views():
def check_views(c_only: bool = False):
# written to work for either BlockManager or ArrayManager

# Check that the underlying data behind df["c"] is still `c`
# after setting with iloc. Since we don't know which entry in
# df._mgr.arrays corresponds to df["c"], we just check that exactly
# one of these arrays is `c`. GH#38939
assert sum(x is c for x in df._mgr.arrays) == 1
if c_only:
# If we ever stop consolidating in setitem_with_indexer,
# this will become unnecessary.
return

assert (
sum(
get_base(x) is a
Expand All @@ -2554,23 +2565,18 @@ def check_views():
# constructor preserves views
check_views()

# TODO: most of the rest of this test belongs in indexing tests
df.iloc[0, 0] = 0
df.iloc[0, 1] = 0
if not copy:
# Check that the underlying data behind df["c"] is still `c`
# after setting with iloc. Since we don't know which entry in
# df._mgr.arrays corresponds to df["c"], we just check that exactly
# one of these arrays is `c`. GH#38939
assert sum(x is c for x in df._mgr.arrays) == 1
# TODO: we can call check_views if we stop consolidating
# in setitem_with_indexer
check_views(True)

# FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve
# view, so we have to check in the other direction
# df.iloc[0, 2] = 0
# if not copy:
# check_views()
c[0] = 0
df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype)
assert df.dtypes.iloc[2] == c.dtype
if not copy:
check_views(True)

if copy:
if a.dtype.kind == "M":
Expand All @@ -2580,14 +2586,13 @@ def check_views():
assert a[0] == a.dtype.type(1)
assert b[0] == b.dtype.type(3)
# FIXME(GH#35417): enable after GH#35417
# assert c[0] == 1
assert df.iloc[0, 2] == 1
assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c
else:
# TODO: we can call check_views if we stop consolidating
# in setitem_with_indexer
# FIXME(GH#35417): enable after GH#35417
# assert b[0] == 0
assert df.iloc[0, 2] == 0
assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c
# TODO: we can check b[0] == 0 if we stop consolidating in
# setitem_with_indexer (except for datetimelike?)

def test_from_series_with_name_with_columns(self):
# GH 7893
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,16 +875,19 @@ def test_series_indexing_zerodim_np_array(self):
result = s.iloc[np.array(0)]
assert result == 1

@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457")
@td.skip_array_manager_not_yet_implemented
def test_iloc_setitem_categorical_updates_inplace(self):
# Mixed dtype ensures we go through take_split_path in setitem_with_indexer
cat = Categorical(["A", "B", "C"])
df = DataFrame({1: cat, 2: [1, 2, 3]})
df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False)

assert tm.shares_memory(df[1], cat)

# This should modify our original values in-place
df.iloc[:, 0] = cat[::-1]
assert tm.shares_memory(df[1], cat)

expected = Categorical(["C", "B", "A"])
expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"])
tm.assert_categorical_equal(cat, expected)

def test_iloc_with_boolean_operation(self):
Expand Down

0 comments on commit 3960157

Please sign in to comment.