Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Refactor cython to use memory views #24932

Merged
merged 3 commits into from
Jan 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class NegInfinity(object):

@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does const provide any perf enhancements?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC if a user happens to pass an ndarray with the writeable flag set to False, cython will raise if the const modifier is not present.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok thanks.

"""
Efficiently find the unique first-differences of the given array.

Expand Down Expand Up @@ -150,7 +150,7 @@ def is_lexsorted(list_of_arrays: list) -> bint:

@cython.boundscheck(False)
@cython.wraparound(False)
def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
"""
compute a 1-d indexer that is an ordering of the passed index,
ordered by the groups. This is a reverse of the label
Expand Down Expand Up @@ -230,7 +230,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:

@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
cdef:
Py_ssize_t i, j, xi, yi, N, K
bint minpv
Expand Down Expand Up @@ -294,7 +294,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
cdef:
Py_ssize_t i, j, xi, yi, N, K
ndarray[float64_t, ndim=2] result
Expand Down Expand Up @@ -435,8 +435,8 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
def pad_inplace(algos_t[:] values,
const uint8_t[:] mask,
limit=None):
cdef:
Py_ssize_t i, N
Expand Down Expand Up @@ -472,8 +472,8 @@ def pad_inplace(ndarray[algos_t] values,

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
def pad_2d_inplace(algos_t[:, :] values,
const uint8_t[:, :] mask,
limit=None):
cdef:
Py_ssize_t i, j, N, K
Expand Down Expand Up @@ -602,8 +602,8 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
def backfill_inplace(algos_t[:] values,
const uint8_t[:] mask,
limit=None):
cdef:
Py_ssize_t i, N
Expand Down Expand Up @@ -639,8 +639,8 @@ def backfill_inplace(ndarray[algos_t] values,

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
def backfill_2d_inplace(algos_t[:, :] values,
const uint8_t[:, :] mask,
limit=None):
cdef:
Py_ssize_t i, j, N, K
Expand Down Expand Up @@ -678,7 +678,7 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,

@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap(ndarray[algos_t] index, object func):
def arrmap(algos_t[:] index, object func):
jreback marked this conversation as resolved.
Show resolved Hide resolved
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
Expand Down
92 changes: 46 additions & 46 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def get_dispatch(dtypes):

@cython.wraparound(False)
@cython.boundscheck(False)
jreback marked this conversation as resolved.
Show resolved Hide resolved
def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_add_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -76,10 +76,10 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_prod_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -123,10 +123,10 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out,
@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_var_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
Expand Down Expand Up @@ -175,10 +175,10 @@ def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_mean_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
Expand Down Expand Up @@ -220,11 +220,11 @@ def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
Py_ssize_t min_count=-1):
def group_ohlc_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
"""
Expand Down Expand Up @@ -293,10 +293,10 @@ def get_dispatch(dtypes):

@cython.wraparound(False)
@cython.boundscheck(False)
def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_last_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
jreback marked this conversation as resolved.
Show resolved Hide resolved
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -350,10 +350,10 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels, int64_t rank,
def group_nth_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels, int64_t rank,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -411,9 +411,9 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_rank_{{name}}(float64_t[:, :] out,
{{c_type}}[:, :] values,
const int64_t[:] labels,
bint is_datetimelike, object ties_method,
bint ascending, bint pct, object na_option):
"""
Expand Down Expand Up @@ -606,10 +606,10 @@ ctypedef fused groupby_t:

@cython.wraparound(False)
@cython.boundscheck(False)
def group_max(ndarray[groupby_t, ndim=2] out,
ndarray[int64_t] counts,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_max(groupby_t[:, :] out,
int64_t[:] counts,
groupby_t[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -669,10 +669,10 @@ def group_max(ndarray[groupby_t, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_min(ndarray[groupby_t, ndim=2] out,
ndarray[int64_t] counts,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_min(groupby_t[:, :] out,
int64_t[:] counts,
groupby_t[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -731,9 +731,9 @@ def group_min(ndarray[groupby_t, ndim=2] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummin(ndarray[groupby_t, ndim=2] out,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_cummin(groupby_t[:, :] out,
groupby_t[:, :] values,
const int64_t[:] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
Expand Down Expand Up @@ -779,9 +779,9 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummax(ndarray[groupby_t, ndim=2] out,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_cummax(groupby_t[:, :] out,
groupby_t[:, :] values,
const int64_t[:] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
Expand Down
18 changes: 10 additions & 8 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ include "hashtable_class_helper.pxi"
include "hashtable_func_helper.pxi"

cdef class Factorizer:
cdef public PyObjectHashTable table
cdef public ObjectVector uniques
cdef public Py_ssize_t count
cdef public:
PyObjectHashTable table
ObjectVector uniques
Py_ssize_t count

def __init__(self, size_hint):
self.table = PyObjectHashTable(size_hint)
Expand Down Expand Up @@ -96,9 +97,10 @@ cdef class Factorizer:


cdef class Int64Factorizer:
cdef public Int64HashTable table
cdef public Int64Vector uniques
cdef public Py_ssize_t count
cdef public:
Int64HashTable table
Int64Vector uniques
Py_ssize_t count

def __init__(self, size_hint):
self.table = Int64HashTable(size_hint)
Expand Down Expand Up @@ -140,7 +142,7 @@ cdef class Int64Factorizer:

@cython.wraparound(False)
@cython.boundscheck(False)
def unique_label_indices(ndarray[int64_t, ndim=1] labels):
def unique_label_indices(const int64_t[:] labels):
"""
indices of the first occurrences of the unique labels
*excluding* -1. equivalent to:
Expand Down Expand Up @@ -168,6 +170,6 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
kh_destroy_int64(table)

arr = idx.to_array()
arr = arr[labels[arr].argsort()]
arr = arr[np.asarray(labels)[arr].argsort()]

return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ cdef class {{name}}HashTable(HashTable):
self.table.vals[k] = <Py_ssize_t>values[i]

@cython.boundscheck(False)
def map_locations(self, ndarray[{{dtype}}_t, ndim=1] values):
def map_locations(self, const {{dtype}}_t[:] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ from pandas._libs.algos import ensure_int64

cdef class BlockPlacement:
# __slots__ = '_as_slice', '_as_array', '_len'
cdef slice _as_slice
cdef object _as_array
cdef:
slice _as_slice
object _as_array

cdef bint _has_slice, _has_array, _is_known_slice_like
bint _has_slice, _has_array, _is_known_slice_like

def __init__(self, val):
cdef:
Expand Down
Loading