Skip to content

Commit

Permalink
BLD: remove blockslider pandas-dev#34014
Browse files Browse the repository at this point in the history
  • Loading branch information
fangchenli committed Oct 9, 2020
1 parent 345efdd commit a82d377
Showing 1 changed file with 40 additions and 130 deletions.
170 changes: 40 additions & 130 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
from copy import copy

from cython import Py_ssize_t

from libc.stdlib cimport free, malloc

import numpy as np

cimport numpy as cnp
from numpy cimport int64_t, ndarray

cnp.import_array()

from pandas._libs cimport util
from pandas._libs.util cimport is_array

from pandas._libs.lib import is_scalar, maybe_convert_objects


cpdef check_result_array(object obj, Py_ssize_t cnt):

if (util.is_array(obj) or
if (is_array(obj) or
(isinstance(obj, list) and len(obj) == cnt) or
getattr(obj, 'shape', None) == (cnt,)):
raise ValueError('Must produce aggregated value')
Expand All @@ -33,7 +29,7 @@ cdef class _BaseGrouper:
if (dummy.dtype != self.arr.dtype
and values.dtype != self.arr.dtype):
raise ValueError('Dummy array must be same dtype')
if util.is_array(values) and not values.flags.contiguous:
if is_array(values) and not values.flags.contiguous:
# e.g. Categorical has no `flags` attribute
values = values.copy()
index = dummy.index.values
Expand Down Expand Up @@ -106,7 +102,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
self.f = f

values = series.values
if util.is_array(values) and not values.flags.c_contiguous:
if is_array(values) and not values.flags.c_contiguous:
# e.g. Categorical has no `flags` attribute
values = values.copy('C')
self.arr = values
Expand Down Expand Up @@ -204,7 +200,7 @@ cdef class SeriesGrouper(_BaseGrouper):
self.f = f

values = series.values
if util.is_array(values) and not values.flags.c_contiguous:
if is_array(values) and not values.flags.c_contiguous:
# e.g. Categorical has no `flags` attribute
values = values.copy('C')
self.arr = values
Expand Down Expand Up @@ -288,9 +284,9 @@ cpdef inline extract_result(object res, bint squeeze=True):
res = res._values
if squeeze and res.ndim == 1 and len(res) == 1:
res = res[0]
if hasattr(res, 'values') and util.is_array(res.values):
if hasattr(res, 'values') and is_array(res.values):
res = res.values
if util.is_array(res):
if is_array(res):
if res.ndim == 0:
res = res.item()
elif squeeze and res.ndim == 1 and len(res) == 1:
Expand All @@ -304,7 +300,7 @@ cdef class Slider:
"""
cdef:
ndarray values, buf
Py_ssize_t stride, orig_len, orig_stride
Py_ssize_t stride, orig_len
char *orig_data

def __init__(self, ndarray values, ndarray buf):
Expand All @@ -320,7 +316,6 @@ cdef class Slider:

self.orig_data = self.buf.data
self.orig_len = self.buf.shape[0]
self.orig_stride = self.buf.strides[0]

self.buf.data = self.values.data
self.buf.strides[0] = self.stride
Expand All @@ -333,10 +328,8 @@ cdef class Slider:
self.buf.shape[0] = end - start

cdef reset(self):

self.buf.shape[0] = self.orig_len
self.buf.data = self.orig_data
self.buf.strides[0] = self.orig_stride
self.buf.shape[0] = self.orig_len


class InvalidApply(Exception):
Expand All @@ -346,7 +339,6 @@ class InvalidApply(Exception):
def apply_frame_axis0(object frame, object f, object names,
const int64_t[:] starts, const int64_t[:] ends):
cdef:
BlockSlider slider
Py_ssize_t i, n = len(starts)
list results
object piece
Expand All @@ -357,124 +349,42 @@ def apply_frame_axis0(object frame, object f, object names,

results = []

slider = BlockSlider(frame)

mutated = False
item_cache = slider.dummy._item_cache
try:
for i in range(n):
slider.move(starts[i], ends[i])
item_cache = frame._item_cache

item_cache.clear() # ugh
chunk = slider.dummy
object.__setattr__(chunk, 'name', names[i])
for i in range(n):
item_cache.clear() # ugh
chunk = frame[starts[i]:ends[i]]
object.__setattr__(chunk, 'name', names[i])

try:
piece = f(chunk)
except Exception:
# We can't be more specific without knowing something about `f`
raise InvalidApply('Let this error raise above us')
try:
piece = f(chunk)
except Exception:
# We can't be more specific without knowing something about `f`
raise InvalidApply('Let this error raise above us')

# Need to infer if low level index slider will cause segfaults
require_slow_apply = i == 0 and piece is chunk
# Need to infer if low level index slider will cause segfaults
require_slow_apply = i == 0 and piece is chunk
try:
if not piece.index is chunk.index:
mutated = True
except AttributeError:
# `piece` might not have an index, could be e.g. an int
pass

if not is_scalar(piece):
# Need to copy data to avoid appending references
try:
if not piece.index is chunk.index:
mutated = True
except AttributeError:
# `piece` might not have an index, could be e.g. an int
pass

if not is_scalar(piece):
# Need to copy data to avoid appending references
try:
piece = piece.copy(deep="all")
except (TypeError, AttributeError):
piece = copy(piece)

results.append(piece)

# If the data was modified inplace we need to
# take the slow path to not risk segfaults
# we have already computed the first piece
if require_slow_apply:
break
finally:
slider.reset()
piece = piece.copy(deep="all")
except (TypeError, AttributeError):
piece = copy(piece)

return results, mutated
results.append(piece)

# If the data was modified inplace we need to
# take the slow path to not risk segfaults
# we have already computed the first piece
if require_slow_apply:
break

cdef class BlockSlider:
"""
Only capable of sliding on axis=0
"""

cdef public:
object frame, dummy, index
int nblocks
Slider idx_slider
list blocks

cdef:
char **base_ptrs

def __init__(self, object frame):
cdef:
Py_ssize_t i
object b

self.frame = frame
self.dummy = frame[:0]
self.index = self.dummy.index

self.blocks = [b.values for b in self.dummy._mgr.blocks]

for x in self.blocks:
util.set_array_not_contiguous(x)

self.nblocks = len(self.blocks)
# See the comment in indexes/base.py about _index_data.
# We need this for EA-backed indexes that have a reference to a 1-d
# ndarray like datetime / timedelta / period.
self.idx_slider = Slider(
self.frame.index._index_data, self.dummy.index._index_data)

self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blocks))
for i, block in enumerate(self.blocks):
self.base_ptrs[i] = (<ndarray>block).data

def __dealloc__(self):
free(self.base_ptrs)

cdef move(self, int start, int end):
cdef:
ndarray arr
Py_ssize_t i

# move blocks
for i in range(self.nblocks):
arr = self.blocks[i]

# axis=1 is the frame's axis=0
arr.data = self.base_ptrs[i] + arr.strides[1] * start
arr.shape[1] = end - start

# move and set the index
self.idx_slider.move(start, end)

object.__setattr__(self.index, '_index_data', self.idx_slider.buf)
self.index._engine.clear_mapping()
self.index._cache.clear() # e.g. inferred_freq must go

cdef reset(self):
cdef:
ndarray arr
Py_ssize_t i

# reset blocks
for i in range(self.nblocks):
arr = self.blocks[i]

# axis=1 is the frame's axis=0
arr.data = self.base_ptrs[i]
arr.shape[1] = 0
return results, mutated

0 comments on commit a82d377

Please sign in to comment.