From 2dfeffbec2ebc903ed0706b2824baaab827edd35 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Mar 2021 14:48:35 +0100 Subject: [PATCH 1/3] [ArrayManager] Fix groupby libreduction Series(Bin)Grouper --- pandas/_libs/reduction.pyx | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 4d0bd4744be5d..0e952bff6100c 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -59,6 +59,7 @@ cdef class _BaseGrouper: cached_typ = self.typ( vslider.buf, dtype=vslider.buf.dtype, index=cached_ityp, name=self.name ) + self.has_block = hasattr(cached_typ._mgr, "_block") else: # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference @@ -66,9 +67,12 @@ cdef class _BaseGrouper: object.__setattr__(cached_ityp, '_index_data', islider.buf) cached_ityp._engine.clear_mapping() cached_ityp._cache.clear() # e.g. inferred_freq must go - object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf) - object.__setattr__(cached_typ._mgr._block, 'mgr_locs', - slice(len(vslider.buf))) + if self.has_block: + object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf) + object.__setattr__(cached_typ._mgr._block, 'mgr_locs', + slice(len(vslider.buf))) + else: + cached_typ._mgr.arrays[0] = vslider.buf object.__setattr__(cached_typ, '_index', cached_ityp) object.__setattr__(cached_typ, 'name', self.name) @@ -108,6 +112,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): cdef public: ndarray arr, index, dummy_arr, dummy_index object values, f, bins, typ, ityp, name + bint has_block def __init__(self, object series, object f, object bins): @@ -201,6 +206,7 @@ cdef class SeriesGrouper(_BaseGrouper): cdef public: ndarray arr, index, dummy_arr, dummy_index object f, labels, values, typ, ityp, name + bint has_block def __init__(self, object series, object f, object labels, Py_ssize_t ngroups): From adf5e8160005252c132d46c51235bc77747b6edc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Mar 2021 15:58:07 +0100 Subject: [PATCH 2/3] simplify with SingleManager.set_values --- pandas/_libs/reduction.pyx | 10 +--------- pandas/core/internals/managers.py | 1 + 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 0e952bff6100c..5649d1378cda3 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -59,7 +59,6 @@ cdef class _BaseGrouper: cached_typ = self.typ( vslider.buf, dtype=vslider.buf.dtype, index=cached_ityp, name=self.name ) - self.has_block = hasattr(cached_typ._mgr, "_block") else: # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference @@ -67,12 +66,7 @@ cdef class _BaseGrouper: object.__setattr__(cached_ityp, '_index_data', islider.buf) cached_ityp._engine.clear_mapping() cached_ityp._cache.clear() # e.g. inferred_freq must go - if self.has_block: - object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf) - object.__setattr__(cached_typ._mgr._block, 'mgr_locs', - slice(len(vslider.buf))) - else: - cached_typ._mgr.arrays[0] = vslider.buf + cached_typ._mgr.set_values(vslider.buf) object.__setattr__(cached_typ, '_index', cached_ityp) object.__setattr__(cached_typ, 'name', self.name) @@ -112,7 +106,6 @@ cdef class SeriesBinGrouper(_BaseGrouper): cdef public: ndarray arr, index, dummy_arr, dummy_index object values, f, bins, typ, ityp, name - bint has_block def __init__(self, object series, object f, object bins): @@ -206,7 +199,6 @@ cdef class SeriesGrouper(_BaseGrouper): cdef public: ndarray arr, index, dummy_arr, dummy_index object f, labels, values, typ, ityp, name - bint has_block def __init__(self, object series, object f, object labels, Py_ssize_t ngroups): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2ad7471d6f086..dd9b5ed194638 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1654,6 +1654,7 @@ def set_values(self, values: ArrayLike): valid for the current Block/SingleBlockManager (length, dtype, etc). """ self.blocks[0].values = values + self.blocks[0].mgr_locs = slice(len(values)) # -------------------------------------------------------------------- From 450e8004d00bf68bdc7064dbc2d6b317282a2f66 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Mar 2021 16:44:42 +0100 Subject: [PATCH 3/3] set to _mgr_locs --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dd9b5ed194638..09559e571d5ee 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1654,7 +1654,7 @@ def set_values(self, values: ArrayLike): valid for the current Block/SingleBlockManager (length, dtype, etc). """ self.blocks[0].values = values - self.blocks[0].mgr_locs = slice(len(values)) + self.blocks[0]._mgr_locs = libinternals.BlockPlacement(slice(len(values))) # --------------------------------------------------------------------