diff --git a/pandas/_libs/lib.pxd b/pandas/_libs/lib.pxd deleted file mode 100644 index b06c071c358c1..0000000000000 --- a/pandas/_libs/lib.pxd +++ /dev/null @@ -1,3 +0,0 @@ -# prototypes for sharing - -cpdef bint is_period(val) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e1d59f807a7fd..30521760327b4 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -21,11 +21,9 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyBytes_Check, PyUnicode_Check, PyTuple_New, + Py_EQ, PyObject_RichCompareBool) -cimport cpython - - from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, PyDateTime_IMPORT) @@ -105,6 +103,14 @@ def item_from_zerodim(object val): """ If the value is a zerodim array, return the item it contains. + Parameters + ---------- + val : object + + Returns + ------- + result : object + Examples -------- >>> item_from_zerodim(1) @@ -117,7 +123,9 @@ def item_from_zerodim(object val): array([1]) """ - return util.unbox_if_zerodim(val) + if cnp.PyArray_IsZeroDim(val): + return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val) + return val @cython.wraparound(False) @@ -405,72 +413,6 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): return slice(start, end) -@cython.wraparound(False) -@cython.boundscheck(False) -def scalar_compare(ndarray[object] values, object val, object op): - cdef: - Py_ssize_t i, n = len(values) - ndarray[uint8_t, cast=True] result - bint isnull_val - int flag - object x - - if op is operator.lt: - flag = cpython.Py_LT - elif op is operator.le: - flag = cpython.Py_LE - elif op is operator.gt: - flag = cpython.Py_GT - elif op is operator.ge: - flag = cpython.Py_GE - elif op is operator.eq: - flag = cpython.Py_EQ - elif op is operator.ne: - flag = cpython.Py_NE - else: - raise ValueError('Unrecognized operator') - - result = np.empty(n, dtype=bool).view(np.uint8) - isnull_val = checknull(val) - - if flag == cpython.Py_NE: - for i in range(n): - x = values[i] - if checknull(x): - result[i] = True - elif isnull_val: - result[i] = True - else: - try: - result[i] = PyObject_RichCompareBool(x, val, flag) - except (TypeError): - result[i] = True - elif flag == cpython.Py_EQ: - for i in range(n): - x = values[i] - if checknull(x): - result[i] = False - elif isnull_val: - result[i] = False - else: - try: - result[i] = PyObject_RichCompareBool(x, val, flag) - except (TypeError): - result[i] = False - - else: - for i in range(n): - x = values[i] - if checknull(x): - result[i] = False - elif isnull_val: - result[i] = False - else: - result[i] = PyObject_RichCompareBool(x, val, flag) - - return result.view(bool) - - @cython.wraparound(False) @cython.boundscheck(False) cpdef bint array_equivalent_object(object[:] left, object[:] right): @@ -486,115 +428,12 @@ cpdef bint array_equivalent_object(object[:] left, object[:] right): # we are either not equal or both nan # I think None == None will be true here - if not (PyObject_RichCompareBool(x, y, cpython.Py_EQ) or + if not (PyObject_RichCompareBool(x, y, Py_EQ) or _checknull(x) and _checknull(y)): return False return True -@cython.wraparound(False) -@cython.boundscheck(False) -def vec_compare(ndarray[object] left, ndarray[object] right, object op): - cdef: - Py_ssize_t i, n = len(left) - ndarray[uint8_t, cast=True] result - int flag - - if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) - - if op is operator.lt: - flag = cpython.Py_LT - elif op is operator.le: - flag = cpython.Py_LE - elif op is operator.gt: - flag = cpython.Py_GT - elif op is operator.ge: - flag = cpython.Py_GE - elif op is operator.eq: - flag = cpython.Py_EQ - elif op is operator.ne: - flag = cpython.Py_NE - else: - raise ValueError('Unrecognized operator') - - result = np.empty(n, dtype=bool).view(np.uint8) - - if flag == cpython.Py_NE: - for i in range(n): - x = left[i] - y = right[i] - - if checknull(x) or checknull(y): - result[i] = True - else: - result[i] = PyObject_RichCompareBool(x, y, flag) - else: - for i in range(n): - x = left[i] - y = right[i] - - if checknull(x) or checknull(y): - result[i] = False - else: - result[i] = PyObject_RichCompareBool(x, y, flag) - - return result.view(bool) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def scalar_binop(ndarray[object] values, object val, object op): - cdef: - Py_ssize_t i, n = len(values) - ndarray[object] result - object x - - result = np.empty(n, dtype=object) - if _checknull(val): - result.fill(val) - return result - - for i in range(n): - x = values[i] - if _checknull(x): - result[i] = x - else: - result[i] = op(x, val) - - return maybe_convert_bool(result) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def vec_binop(ndarray[object] left, ndarray[object] right, object op): - cdef: - Py_ssize_t i, n = len(left) - ndarray[object] result - - if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) - - result = np.empty(n, dtype=object) - - for i in range(n): - x = left[i] - y = right[i] - try: - result[i] = op(x, y) - except TypeError: - if _checknull(x): - result[i] = x - elif _checknull(y): - result[i] = y - else: - raise - - return maybe_convert_bool(result) - - def astype_intsafe(ndarray[object] arr, new_dtype): cdef: Py_ssize_t i, n = len(arr) diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx new file mode 100644 index 0000000000000..148018ece20e2 --- /dev/null +++ b/pandas/_libs/ops.pyx @@ -0,0 +1,296 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +import operator + +from cpython cimport (PyFloat_Check, PyBool_Check, + PyObject_RichCompareBool, + Py_EQ, Py_NE, Py_LT, Py_LE, Py_GT, Py_GE) + +cimport cython +from cython cimport Py_ssize_t + +import numpy as np +from numpy cimport ndarray, uint8_t + + +from util cimport UINT8_MAX, _checknull + +from missing cimport checknull + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_compare(ndarray[object] values, object val, object op): + """ + Compare each element of `values` array with the scalar `val`, with + the comparison operation described by `op`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(values) + ndarray[uint8_t, cast=True] result + bint isnull_val + int flag + object x + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + isnull_val = checknull(val) + + if flag == Py_NE: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = True + elif isnull_val: + result[i] = True + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except (TypeError): + result[i] = True + elif flag == Py_EQ: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except (TypeError): + result[i] = False + + else: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, val, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_compare(ndarray[object] left, ndarray[object] right, object op): + """ + Compare the elements of `left` with the elements of `right` pointwise, + with the comparison operation described by `op`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(left) + ndarray[uint8_t, cast=True] result + int flag + + if n != len(right): + raise ValueError('Arrays were different lengths: %d vs %d' + % (n, len(right))) + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + + if flag == Py_NE: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = True + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + else: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_binop(ndarray[object] values, object val, object op): + """ + Apply the given binary operator `op` between each element of the array + `values` and the scalar `val`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + ndarray[object] result + object x + + result = np.empty(n, dtype=object) + if _checknull(val): + result.fill(val) + return result + + for i in range(n): + x = values[i] + if _checknull(x): + result[i] = x + else: + result[i] = op(x, val) + + return maybe_convert_bool(result) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_binop(ndarray[object] left, ndarray[object] right, object op): + """ + Apply the given binary operator `op` pointwise to the elements of + arrays `left` and `right`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(left) + ndarray[object] result + + if n != len(right): + raise ValueError('Arrays were different lengths: %d vs %d' + % (n, len(right))) + + result = np.empty(n, dtype=object) + + for i in range(n): + x = left[i] + y = right[i] + try: + result[i] = op(x, y) + except TypeError: + if _checknull(x): + result[i] = x + elif _checknull(y): + result[i] = y + else: + raise + + return maybe_convert_bool(result) + + +def maybe_convert_bool(ndarray[object] arr, + true_values=None, false_values=None): + cdef: + Py_ssize_t i, n + ndarray[uint8_t] result + object val + set true_vals, false_vals + int na_count = 0 + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + + # the defaults + true_vals = set(('True', 'TRUE', 'true')) + false_vals = set(('False', 'FALSE', 'false')) + + if true_values is not None: + true_vals = true_vals | set(true_values) + + if false_values is not None: + false_vals = false_vals | set(false_values) + + for i from 0 <= i < n: + val = arr[i] + + if PyBool_Check(val): + if val is True: + result[i] = 1 + else: + result[i] = 0 + elif val in true_vals: + result[i] = 1 + elif val in false_vals: + result[i] = 0 + elif PyFloat_Check(val): + result[i] = UINT8_MAX + na_count += 1 + else: + return arr + + if na_count > 0: + mask = result == UINT8_MAX + arr = result.view(np.bool_).astype(object) + np.putmask(arr, mask, np.nan) + return arr + else: + return result.view(np.bool_) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 75bff34e4a391..1fa07dbed6822 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -752,7 +752,7 @@ cdef class IntegerFloatValidator(Validator): return issubclass(self.dtype.type, np.integer) -cpdef bint is_integer_float_array(ndarray values): +cdef bint is_integer_float_array(ndarray values): cdef: IntegerFloatValidator validator = IntegerFloatValidator( len(values), @@ -803,7 +803,7 @@ cdef class UnicodeValidator(Validator): return issubclass(self.dtype.type, np.unicode_) -cpdef bint is_unicode_array(ndarray values, bint skipna=False): +cdef bint is_unicode_array(ndarray values, bint skipna=False): cdef: UnicodeValidator validator = UnicodeValidator( len(values), @@ -822,7 +822,7 @@ cdef class BytesValidator(Validator): return issubclass(self.dtype.type, np.bytes_) -cpdef bint is_bytes_array(ndarray values, bint skipna=False): +cdef bint is_bytes_array(ndarray values, bint skipna=False): cdef: BytesValidator validator = BytesValidator( len(values), @@ -1090,7 +1090,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, cdef: int status, maybe_int Py_ssize_t i, n = values.size - Seen seen = Seen(coerce_numeric); + Seen seen = Seen(coerce_numeric) ndarray[float64_t] floats = np.empty(n, dtype='f8') ndarray[complex128_t] complexes = np.empty(n, dtype='c16') ndarray[int64_t] ints = np.empty(n, dtype='i8') @@ -1224,7 +1224,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, ndarray[uint8_t] bools ndarray[int64_t] idatetimes ndarray[int64_t] itimedeltas - Seen seen = Seen(); + Seen seen = Seen() object val, onan float64_t fval, fnan @@ -1405,55 +1405,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return objects -def maybe_convert_bool(ndarray[object] arr, - true_values=None, false_values=None): - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - object val - set true_vals, false_vals - int na_count = 0 - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - - # the defaults - true_vals = set(('True', 'TRUE', 'true')) - false_vals = set(('False', 'FALSE', 'false')) - - if true_values is not None: - true_vals = true_vals | set(true_values) - - if false_values is not None: - false_vals = false_vals | set(false_values) - - for i from 0 <= i < n: - val = arr[i] - - if cpython.PyBool_Check(val): - if val is True: - result[i] = 1 - else: - result[i] = 0 - elif val in true_vals: - result[i] = 1 - elif val in false_vals: - result[i] = 0 - elif PyFloat_Check(val): - result[i] = UINT8_MAX - na_count += 1 - else: - return arr - - if na_count > 0: - mask = result == UINT8_MAX - arr = result.view(np.bool_).astype(object) - np.putmask(arr, mask, np.nan) - return arr - else: - return result.view(np.bool_) - - def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, bint convert=1): """ diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd index cf23df1279f34..5030b742849f8 100644 --- a/pandas/_libs/src/util.pxd +++ b/pandas/_libs/src/util.pxd @@ -164,22 +164,3 @@ cdef inline bint _checknan(object val): cdef inline bint is_period_object(object val): return getattr(val, '_typ', '_typ') == 'period' - - -cdef inline object unbox_if_zerodim(object arr): - """ - If arr is zerodim array, return a proper array scalar (e.g. np.int64). - Otherwise, return arr as is. - - Parameters - ---------- - arr : object - - Returns - ------- - result : object - """ - if cnp.PyArray_IsZeroDim(arr): - return cnp.PyArray_ToScalar(cnp.PyArray_DATA(arr), arr) - else: - return arr diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index beaca1a8483c7..4726bd7ea3629 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -543,7 +543,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): ndarray[int64_t] trans, deltas int64_t delta, local_val Py_ssize_t posn - datetime dt assert obj.tzinfo is None @@ -679,7 +678,6 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): Py_ssize_t pos int64_t v, offset, utc_date pandas_datetimestruct dts - datetime dt # See GH#17734 We should always be converting either from UTC or to UTC assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC') @@ -739,7 +737,6 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): ndarray[Py_ssize_t] posn int64_t v, offset, delta pandas_datetimestruct dts - datetime dt if len(vals) == 0: return np.array([], dtype=np.int64) @@ -844,7 +841,6 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, pandas_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' - datetime dt # Vectorized version of DstTzInfo.localize @@ -1086,7 +1082,6 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz): ndarray[int64_t] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts - datetime dt if is_utc(tz): with nogil: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index b20f208d14dc5..f75e3d7df8237 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -11,7 +11,7 @@ import pandas as pd from pandas._libs import (lib, index as libindex, - algos as libalgos) + algos as libalgos, ops as libops) from pandas import compat from pandas.util._decorators import Appender @@ -1040,9 +1040,9 @@ def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, (ABCSeries, ABCIndex)): y = y.values - result = lib.vec_compare(x, y, op) + result = libops.vec_compare(x, y, op) else: - result = lib.scalar_compare(x, y, op) + result = libops.scalar_compare(x, y, op) return result @@ -1205,13 +1205,13 @@ def na_op(x, y): else: x = _ensure_object(x) y = _ensure_object(y) - result = lib.vec_binop(x, y, op) + result = libops.vec_binop(x, y, op) else: # let null fall thru if not isna(y): y = bool(y) try: - result = lib.scalar_binop(x, y, op) + result = libops.scalar_binop(x, y, op) except: raise TypeError("cannot compare a dtyped [{dtype}] array " "with a scalar of type [{typ}]" diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b1c1ede66236c..ce688f8b16fe5 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -20,6 +20,7 @@ from pandas.util._decorators import Appender import re import pandas._libs.lib as lib +import pandas._libs.ops as libops import warnings import textwrap import codecs @@ -461,7 +462,7 @@ def rep(x, r): return compat.text_type.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) - result = lib.vec_binop(com._values_from_object(arr), repeats, rep) + result = libops.vec_binop(com._values_from_object(arr), repeats, rep) return result diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4b1385514a0c4..469cd6d82e4b4 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -43,6 +43,7 @@ import pandas._libs.lib as lib import pandas._libs.parsers as parsers +import pandas._libs.ops as libops from pandas._libs.tslibs import parsing # BOM character (byte order mark) @@ -1616,9 +1617,9 @@ def _infer_types(self, values, na_values, try_num_bool=True): na_count = parsers.sanitize_objects(values, na_values, False) if result.dtype == np.object_ and try_num_bool: - result = lib.maybe_convert_bool(values, - true_values=self.true_values, - false_values=self.false_values) + result = libops.maybe_convert_bool(values, + true_values=self.true_values, + false_values=self.false_values) return result, na_count diff --git a/setup.py b/setup.py index c7784260d79ca..7fb5358d0950b 100755 --- a/setup.py +++ b/setup.py @@ -313,6 +313,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/testing.pyx', 'pandas/_libs/skiplist.pyx', 'pandas/_libs/sparse.pyx', + 'pandas/_libs/ops.pyx', 'pandas/_libs/parsers.pyx', 'pandas/_libs/tslibs/ccalendar.pyx', 'pandas/_libs/tslibs/period.pyx', @@ -525,6 +526,10 @@ def pxd(name): '_libs.reduction': { 'pyxfile': '_libs/reduction', 'pxdfiles': ['_libs/src/util']}, + '_libs.ops': { + 'pyxfile': '_libs/ops', + 'pxdfiles': ['_libs/src/util', + '_libs/missing']}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', 'pxdfiles': ['_libs/src/util',