From e778569cb06e9bd0f1876026c96b1cf1ee57b44d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 15:29:54 -0800 Subject: [PATCH] CLN: BlockManager.apply (#29825) --- pandas/core/generic.py | 6 +-- pandas/core/internals/blocks.py | 14 +++---- pandas/core/internals/managers.py | 62 ++++++++++++------------------- 3 files changed, 32 insertions(+), 50 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 48500a9a428d0c..59fd35666efe6f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5571,7 +5571,7 @@ def _to_dict_of_blocks(self, copy=True): for k, v, in self._data.to_dict(copy=copy).items() } - def astype(self, dtype, copy=True, errors="raise"): + def astype(self, dtype, copy: bool_t = True, errors: str = "raise"): """ Cast a pandas object to a specified dtype ``dtype``. @@ -5697,10 +5697,10 @@ def astype(self, dtype, copy=True, errors="raise"): elif is_extension_array_dtype(dtype) and self.ndim > 1: # GH 18099/22869: columnwise conversion to extension dtype # GH 24704: use iloc to handle duplicate column names - results = ( + results = [ self.iloc[:, i].astype(dtype, copy=copy) for i in range(len(self.columns)) - ) + ] else: # else, only a single dtype is given diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8dd39473ee1f43..8a543832b50feb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -523,16 +523,14 @@ def f(mask, val, idx): return self.split_and_operate(None, f, False) - def astype(self, dtype, copy=False, errors="raise", **kwargs): - return self._astype(dtype, copy=copy, errors=errors, **kwargs) - - def _astype(self, dtype, copy=False, errors="raise", **kwargs): - """Coerce to the new type + def astype(self, dtype, copy: bool = False, errors: str = "raise"): + """ + Coerce to the new dtype. Parameters ---------- dtype : str, dtype convertible - copy : boolean, default False + copy : bool, default False copy if indicated errors : str, {'raise', 'ignore'}, default 'ignore' - ``raise`` : allow exceptions to be raised @@ -2142,7 +2140,7 @@ def _maybe_coerce_values(self, values): assert isinstance(values, np.ndarray), type(values) return values - def _astype(self, dtype, **kwargs): + def astype(self, dtype, copy: bool = False, errors: str = "raise"): """ these automatically copy, so copy=True has no effect raise on an except if raise == True @@ -2158,7 +2156,7 @@ def _astype(self, dtype, **kwargs): return self.make_block(values) # delegate - return super()._astype(dtype=dtype, **kwargs) + return super().astype(dtype=dtype, copy=copy, errors=errors) def _can_hold_element(self, element: Any) -> bool: tipo = maybe_infer_dtype_type(element) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0fe95a4b7f3707..9adfe41b68e4ff 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -340,33 +340,20 @@ def _verify_integrity(self): "tot_items: {1}".format(len(self.items), tot_items) ) - def apply( - self, - f, - axes=None, - filter=None, - do_integrity_check=False, - consolidate=True, - **kwargs, - ): + def apply(self, f: str, filter=None, **kwargs): """ - iterate over the blocks, collect and create a new block manager + Iterate over the blocks, collect and create a new BlockManager. Parameters ---------- - f : the callable or function name to operate on at the block level - axes : optional (if not supplied, use self.axes) + f : str + Name of the Block method to apply. filter : list, if supplied, only call the block if the filter is in the block - do_integrity_check : boolean, default False. Do the block manager - integrity check - consolidate: boolean, default True. Join together blocks having same - dtype Returns ------- - Block Manager (new object) - + BlockManager """ result_blocks = [] @@ -380,8 +367,7 @@ def apply( else: kwargs["filter"] = filter_locs - if consolidate: - self._consolidate_inplace() + self._consolidate_inplace() if f == "where": align_copy = True @@ -429,11 +415,8 @@ def apply( result_blocks = _extend_blocks(applied, result_blocks) if len(result_blocks) == 0: - return self.make_empty(axes or self.axes) - bm = type(self)( - result_blocks, axes or self.axes, do_integrity_check=do_integrity_check - ) - bm._consolidate_inplace() + return self.make_empty(self.axes) + bm = type(self)(result_blocks, self.axes, do_integrity_check=False) return bm def quantile( @@ -540,8 +523,8 @@ def get_axe(block, qs, axes): [make_block(values, ndim=1, placement=np.arange(len(values)))], axes[0] ) - def isna(self, func, **kwargs): - return self.apply("apply", func=func, **kwargs) + def isna(self, func): + return self.apply("apply", func=func) def where(self, **kwargs): return self.apply("where", **kwargs) @@ -567,8 +550,8 @@ def fillna(self, **kwargs): def downcast(self, **kwargs): return self.apply("downcast", **kwargs) - def astype(self, dtype, **kwargs): - return self.apply("astype", dtype=dtype, **kwargs) + def astype(self, dtype, copy: bool = False, errors: str = "raise"): + return self.apply("astype", dtype=dtype, copy=copy, errors=errors) def convert(self, **kwargs): return self.apply("convert", **kwargs) @@ -768,14 +751,19 @@ def copy(self, deep=True): """ # this preserves the notion of view copying of axes if deep: - if deep == "all": - copy = lambda ax: ax.copy(deep=True) - else: - copy = lambda ax: ax.view() - new_axes = [copy(ax) for ax in self.axes] + + def copy_func(ax): + if deep == "all": + return ax.copy(deep=True) + else: + return ax.view() + + new_axes = [copy_func(ax) for ax in self.axes] else: new_axes = list(self.axes) - return self.apply("copy", axes=new_axes, deep=deep, do_integrity_check=False) + res = self.apply("copy", deep=deep) + res.axes = new_axes + return res def as_array(self, transpose=False, items=None): """Convert the blockmanager data into an numpy array. @@ -1527,10 +1515,6 @@ def get_slice(self, slobj, axis=0): def index(self): return self.axes[0] - def convert(self, **kwargs): - """ convert the whole block as one """ - return self.apply("convert", **kwargs) - @property def dtype(self): return self._block.dtype