Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ArrayManager] Array version of putmask logic #44396

Closed
86 changes: 85 additions & 1 deletion pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,30 @@
)

from pandas.core.dtypes.cast import (
can_hold_element,
convert_scalar_for_putitemlike,
find_common_type,
infer_dtype_from,
)
from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
is_interval_dtype,
is_list_like,
)
from pandas.core.dtypes.missing import isna_compat
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna_compat,
na_value_for_dtype,
)

from pandas.core.arrays import ExtensionArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray


def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
Expand Down Expand Up @@ -225,3 +237,75 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
other = list(other)

return other


def putmask_flexible_ndarray(array: np.ndarray, mask, new):
"""
Putmask implementation for ArrayManager putmask for ndarray.

Flexible version that will upcast if needed.
"""
mask, noop = validate_putmask(array, mask)
assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))

# if we are passed a scalar None, convert it here
if not array.dtype == "object" and is_valid_na_for_dtype(new, array.dtype):
new = na_value_for_dtype(array.dtype, compat=False)

if can_hold_element(array, new):
putmask_without_repeat(array, mask, new)
return array

elif noop:
return array

dtype, _ = infer_dtype_from(new)
if dtype.kind in ["m", "M"]:
array = array.astype(object)
# convert to list to avoid numpy coercing datetimelikes to integers
new = setitem_datetimelike_compat(
array, mask.sum(), new # type: ignore[arg-type]
)
# putmask_smart below converts it back to array
np.putmask(array, mask, new)
return array

new_values = putmask_smart(array, mask, new)
return new_values


def _coerce_to_target_dtype(array, new):
dtype, _ = infer_dtype_from(new, pandas_dtype=True)
new_dtype = find_common_type([array.dtype, dtype])
return array.astype(new_dtype, copy=False)


def putmask_flexible_ea(array: ExtensionArray, mask, new):
"""
Putmask implementation for ArrayManager putmask for EA.

Flexible version that will upcast if needed.
"""
mask = extract_bool_array(mask)

if isinstance(array, NDArrayBackedExtensionArray):
if not can_hold_element(array, new):
array = _coerce_to_target_dtype(array, new)
return putmask_flexible_ndarray(array, mask, new)

try:
array._putmask(mask, new)
except TypeError:
if not is_interval_dtype(array.dtype):
# Discussion about what we want to support in the general
# case GH#39584
raise

array = _coerce_to_target_dtype(array, new)
if array.dtype == np.dtype("object"):
# For now at least, only support casting e.g.
# Interval[int64]->Interval[float64],
raise
return putmask_flexible_ea(array, mask, new)

return array
35 changes: 29 additions & 6 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
)

import pandas.core.algorithms as algos
from pandas.core.array_algos.putmask import (
putmask_flexible_ea,
putmask_flexible_ndarray,
)
from pandas.core.array_algos.quantile import quantile_compat
from pandas.core.array_algos.take import take_1d
from pandas.core.arrays import (
Expand Down Expand Up @@ -352,12 +356,31 @@ def putmask(self, mask, new, align: bool = True):
align_keys = ["mask"]
new = extract_array(new, extract_numpy=True)

return self.apply_with_block(
"putmask",
align_keys=align_keys,
mask=mask,
new=new,
)
kwargs = {"mask": mask, "new": new}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not feasible to go through 'apply'?

Copy link
Member Author

@jorisvandenbossche jorisvandenbossche Nov 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. So I forgot that ArrayManager.apply indeed already has this "align" logic as well. However, that is not used at the moment (only putmask and where define align keys, and those both still go through apply_with_block at the moment; this is also confirmed by codecov that it is unused).

But then I would rather remove it from apply, instead of going through apply, which will also prevent duplication. The reason for doing it in putmask: 1) apply is already quite complex (eg also dealing with ignoring failures), so being able to remove the "alignment" handling there would be nice (can still share this later with where), and 2) putmask can update the existing arrays, while apply always constructs a new manager, and 3) for the CoW branch I need to add copy-on-write logic to putmask, which is not needed for apply in general.

aligned_kwargs = {k: kwargs[k] for k in align_keys}

for i, arr in enumerate(self.arrays):

for k, obj in aligned_kwargs.items():
if isinstance(obj, (ABCSeries, ABCDataFrame)):
# The caller is responsible for ensuring that
# obj.axes[-1].equals(self.items)
if obj.ndim == 1:
kwargs[k] = obj._values
else:
kwargs[k] = obj.iloc[:, i]._values
else:
# otherwise we have an ndarray
if self.ndim == 2:
kwargs[k] = obj[i]

if isinstance(arr, np.ndarray):
new = putmask_flexible_ndarray(arr, **kwargs)
else:
new = putmask_flexible_ea(arr, **kwargs)
self.arrays[i] = new

return self

def diff(self: T, n: int, axis: int) -> T:
if axis == 1:
Expand Down