From 13476585b1a4b7960679149f0ccd517187015063 Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Sun, 4 Nov 2018 16:53:27 +0100 Subject: [PATCH] API: fix corner case of lib.infer_dtype (#23422) --- pandas/_libs/lib.pyx | 5 ++++- pandas/_libs/missing.pxd | 3 +++ pandas/_libs/missing.pyx | 2 +- pandas/tests/dtypes/test_inference.py | 16 ++++++++++++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5b1d416179edbf..ce7aee163c2bd2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -59,7 +59,7 @@ from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone, tz_compare -from missing cimport (checknull, +from missing cimport (checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period) @@ -1216,6 +1216,9 @@ def infer_dtype(value: object, skipna: bool=False) -> str: values = construct_1d_object_array_from_listlike(value) values = getattr(values, 'values', values) + if skipna: + values = values[~isnaobj(values)] + val = _try_infer_map(values) if val is not None: return val diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index 9f660cc6785c8c..d0dd306680ae81 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -1,7 +1,10 @@ # -*- coding: utf-8 -*- +from numpy cimport ndarray, uint8_t + cpdef bint checknull(object val) cpdef bint checknull_old(object val) +cpdef ndarray[uint8_t] isnaobj(ndarray arr) cdef bint is_null_datetime64(v) cdef bint is_null_timedelta64(v) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 95ea103025b239..d6786a96871bd4 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -124,7 +124,7 @@ cdef inline bint _check_none_nan_inf_neginf(object val): @cython.wraparound(False) @cython.boundscheck(False) -def isnaobj(ndarray arr): +cpdef ndarray[uint8_t] isnaobj(ndarray arr): """ Return boolean mask denoting which elements of a 1-D array are na-like, according to the criteria defined in `_check_all_nulls`: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index f1535401994630..dcd03b242a47b6 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -592,6 +592,22 @@ def test_unicode(self): expected = 'unicode' if PY2 else 'string' assert result == expected + @pytest.mark.parametrize('dtype, missing, skipna, expected', [ + (float, np.nan, False, 'floating'), + (float, np.nan, True, 'floating'), + (object, np.nan, False, 'floating'), + (object, np.nan, True, 'empty'), + (object, None, False, 'mixed'), + (object, None, True, 'empty') + ]) + @pytest.mark.parametrize('box', [pd.Series, np.array]) + def test_object_empty(self, box, missing, dtype, skipna, expected): + # GH 23421 + arr = box([missing, missing], dtype=dtype) + + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected + def test_datetime(self): dates = [datetime(2012, 1, x) for x in range(1, 20)]