diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 1cd9fe65407ba..e318659d9f355 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -89,6 +89,7 @@ new_block, to_native_types, ) +from pandas.core.internals.methods import putmask_flexible if TYPE_CHECKING: from pandas import Float64Index @@ -190,7 +191,7 @@ def __repr__(self) -> str: def apply( self: T, f, - align_keys: list[str] | None = None, + align_keys: list[str] | None = None, # not used for ArrayManager ignore_failures: bool = False, **kwargs, ) -> T: @@ -201,7 +202,6 @@ def apply( ---------- f : str or callable Name of the Array method to apply. - align_keys: List[str] or None, default None ignore_failures: bool, default False **kwargs Keywords to pass to `f` @@ -212,32 +212,14 @@ def apply( """ assert "filter" not in kwargs - align_keys = align_keys or [] result_arrays: list[np.ndarray] = [] result_indices: list[int] = [] # fillna: Series/DataFrame is responsible for making sure value is aligned - aligned_args = {k: kwargs[k] for k in align_keys} - if f == "apply": f = kwargs.pop("func") for i, arr in enumerate(self.arrays): - - if aligned_args: - - for k, obj in aligned_args.items(): - if isinstance(obj, (ABCSeries, ABCDataFrame)): - # The caller is responsible for ensuring that - # obj.axes[-1].equals(self.items) - if obj.ndim == 1: - kwargs[k] = obj.iloc[i] - else: - kwargs[k] = obj.iloc[:, i]._values - else: - # otherwise we have an array-like - kwargs[k] = obj[i] - try: if callable(f): applied = f(arr, **kwargs) @@ -352,12 +334,28 @@ def putmask(self, mask, new, align: bool = True): align_keys = ["mask"] new = extract_array(new, extract_numpy=True) - return self.apply_with_block( - "putmask", - align_keys=align_keys, - mask=mask, - new=new, - ) + kwargs = {"mask": mask, "new": new} + aligned_kwargs = {k: kwargs[k] for k in align_keys} + + for i, arr in enumerate(self.arrays): + + for k, obj in aligned_kwargs.items(): + if isinstance(obj, (ABCSeries, ABCDataFrame)): + # The caller is responsible for ensuring that + # obj.axes[-1].equals(self.items) + if obj.ndim == 1: + kwargs[k] = obj._values + else: + kwargs[k] = obj.iloc[:, i]._values + else: + # otherwise we have an ndarray + if self.ndim == 2: + kwargs[k] = obj[i] + + new = putmask_flexible(arr, **kwargs) + self.arrays[i] = new + + return self def diff(self: T, n: int, axis: int) -> T: if axis == 1: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 46e5b5b9c53ad..33efa8e11ac32 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -110,6 +110,7 @@ is_empty_indexer, is_scalar_indexer, ) +from pandas.core.internals.methods import putmask_flexible_ea import pandas.core.missing as missing if TYPE_CHECKING: @@ -1417,30 +1418,7 @@ def putmask(self, mask, new) -> list[Block]: """ See Block.putmask.__doc__ """ - mask = extract_bool_array(mask) - - new_values = self.values - - if mask.ndim == new_values.ndim + 1: - # TODO(EA2D): unnecessary with 2D EAs - mask = mask.reshape(new_values.shape) - - try: - # Caller is responsible for ensuring matching lengths - new_values._putmask(mask, new) - except TypeError: - if not is_interval_dtype(self.dtype): - # Discussion about what we want to support in the general - # case GH#39584 - raise - - blk = self.coerce_to_target_dtype(new) - if blk.dtype == _dtype_obj: - # For now at least, only support casting e.g. - # Interval[int64]->Interval[float64], - raise - return blk.putmask(mask, new) - + new_values = putmask_flexible_ea(self.values, mask, new) nb = type(self)(new_values, placement=self._mgr_locs, ndim=self.ndim) return [nb] diff --git a/pandas/core/internals/methods.py b/pandas/core/internals/methods.py new file mode 100644 index 0000000000000..e844c905accd3 --- /dev/null +++ b/pandas/core/internals/methods.py @@ -0,0 +1,118 @@ +""" +Wrappers around array_algos with internals-specific logic +""" +from __future__ import annotations + +import numpy as np + +from pandas.core.dtypes.cast import ( + can_hold_element, + find_common_type, + infer_dtype_from, +) +from pandas.core.dtypes.common import is_interval_dtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + na_value_for_dtype, +) + +from pandas.core.array_algos.putmask import ( + extract_bool_array, + putmask_smart, + putmask_without_repeat, + setitem_datetimelike_compat, + validate_putmask, +) +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray + + +def putmask_flexible(array: np.ndarray | ExtensionArray, mask, new): + """ + Putmask implementation for ArrayManager.putmask. + + Flexible version that will upcast if needed. + """ + if isinstance(array, np.ndarray): + return putmask_flexible_ndarray(array, mask=mask, new=new) + else: + return putmask_flexible_ea(array, mask=mask, new=new) + + +def putmask_flexible_ndarray(array: np.ndarray, mask, new): + """ + Putmask implementation for ArrayManager putmask for ndarray. + + Flexible version that will upcast if needed. + """ + mask, noop = validate_putmask(array, mask) + assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) + + # if we are passed a scalar None, convert it here + if not array.dtype == "object" and is_valid_na_for_dtype(new, array.dtype): + new = na_value_for_dtype(array.dtype, compat=False) + + if can_hold_element(array, new): + putmask_without_repeat(array, mask, new) + return array + + elif noop: + return array + + dtype, _ = infer_dtype_from(new) + if dtype.kind in ["m", "M"]: + array = array.astype(object) + # convert to list to avoid numpy coercing datetimelikes to integers + new = setitem_datetimelike_compat(array, mask.sum(), new) + # putmask_smart below converts it back to array + np.putmask(array, mask, new) + return array + + new_values = putmask_smart(array, mask, new) + return new_values + + +def _coerce_to_target_dtype(array, new): + dtype, _ = infer_dtype_from(new, pandas_dtype=True) + new_dtype = find_common_type([array.dtype, dtype]) + return array.astype(new_dtype, copy=False) + + +def putmask_flexible_ea(array: ExtensionArray, mask, new): + """ + Putmask implementation for ArrayManager putmask for EA. + + Flexible version that will upcast if needed. + """ + mask = extract_bool_array(mask) + + if mask.ndim == array.ndim + 1: + # TODO(EA2D): unnecessary with 2D EAs + mask = mask.reshape(array.shape) + + if isinstance(array, NDArrayBackedExtensionArray): + if not can_hold_element(array, new): + array = _coerce_to_target_dtype(array, new) + return putmask_flexible(array, mask, new) + + try: + array._putmask(mask, new) + except TypeError: + if not is_interval_dtype(array.dtype): + # Discussion about what we want to support in the general + # case GH#39584 + raise + + array = _coerce_to_target_dtype(array, new) + if array.dtype == np.dtype("object"): + # For now at least, only support casting e.g. + # Interval[int64]->Interval[float64], + raise + return putmask_flexible_ea(array, mask, new) + + return array diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index c759cc163106d..3afcf8917cea8 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -19,6 +19,7 @@ def test_namespace(): "blocks", "concat", "managers", + "methods", "construction", "array_manager", "base",