Skip to content

Commit

Permalink
ENH: [Draft] Fix issue #35131 Identify zero-dimensional duck arrays a…
Browse files Browse the repository at this point in the history
…s non-iterable (#44626)
  • Loading branch information
burnpanck authored Dec 20, 2021
1 parent 2ab1d1f commit d228a78
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ Other enhancements
- :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`)
- :meth:`read_json` can now parse unsigned long long integers (:issue:`26068`)
- :meth:`DataFrame.take` now raises a ``TypeError`` when passed a scalar for the indexer (:issue:`42875`)
- :meth:`is_list_like` now identifies duck-arrays as list-like unless ``.ndim == 0`` (:issue:`35131`)
- :class:`ExtensionDtype` and :class:`ExtensionArray` are now (de)serialized when exporting a :class:`DataFrame` with :meth:`DataFrame.to_json` using ``orient='table'`` (:issue:`20612`, :issue:`44705`).
-

Expand Down
11 changes: 9 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1098,13 +1098,20 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:


cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
# first, performance short-cuts for the most common cases
if util.is_array(obj):
# exclude zero-dimensional numpy arrays, effectively scalars
return not cnp.PyArray_IsZeroDim(obj)
elif isinstance(obj, list):
return True
# then the generic implementation
return (
# equiv: `isinstance(obj, abc.Iterable)`
getattr(obj, "__iter__", None) is not None and not isinstance(obj, type)
# we do not count strings/unicode/bytes as list-like
and not isinstance(obj, (str, bytes))
# exclude zero-dimensional numpy arrays, effectively scalars
and not cnp.PyArray_IsZeroDim(obj)
# exclude zero-dimensional duck-arrays, effectively scalars
and not (hasattr(obj, "ndim") and obj.ndim == 0)
# exclude sets if allow_sets is False
and not (allow_sets is False and isinstance(obj, abc.Set))
)
Expand Down
71 changes: 68 additions & 3 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,56 @@ def coerce(request):
return request.param


class MockNumpyLikeArray:
"""
A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy
The key is that it is not actually a numpy array so
``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other
important properties are that the class defines a :meth:`__iter__` method
(so that ``isinstance(abc.Iterable)`` returns ``True``) and has a
:meth:`ndim` property, as pandas special-cases 0-dimensional arrays in some
cases.
We expect pandas to behave with respect to such duck arrays exactly as
with real numpy arrays. In particular, a 0-dimensional duck array is *NOT*
a scalar (`is_scalar(np.array(1)) == False`), but it is not list-like either.
"""

def __init__(self, values):
self._values = values

def __iter__(self):
iter_values = iter(self._values)

def it_outer():
yield from iter_values

return it_outer()

def __len__(self):
return len(self._values)

def __array__(self, t=None):
return np.asarray(self._values, dtype=t)

@property
def ndim(self):
return self._values.ndim

@property
def dtype(self):
return self._values.dtype

@property
def size(self):
return self._values.size

@property
def shape(self):
return self._values.shape


# collect all objects to be tested for list-like-ness; use tuples of objects,
# whether they are list-like or not (special casing for sets), and their ID
ll_params = [
Expand Down Expand Up @@ -109,6 +159,15 @@ def coerce(request):
(np.ndarray((2,) * 4), True, "ndarray-4d"),
(np.array([[[[]]]]), True, "ndarray-4d-empty"),
(np.array(2), False, "ndarray-0d"),
(MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
(MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
(MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
(MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
(MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
(MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
(1, False, "int"),
(b"123", False, "bytes"),
(b"", False, "bytes-empty"),
Expand Down Expand Up @@ -181,6 +240,8 @@ def test_is_array_like():
assert inference.is_array_like(Series([1, 2]))
assert inference.is_array_like(np.array(["a", "b"]))
assert inference.is_array_like(Index(["2016-01-01"]))
assert inference.is_array_like(np.array([2, 3]))
assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3])))

class DtypeList(list):
dtype = "special"
Expand Down Expand Up @@ -1811,9 +1872,13 @@ def test_is_scalar_numpy_zerodim_arrays(self):

@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
def test_is_scalar_numpy_arrays(self):
assert not is_scalar(np.array([]))
assert not is_scalar(np.array([[]]))
assert not is_scalar(np.matrix("1; 2"))
for a in [
np.array([]),
np.array([[]]),
np.matrix("1; 2"),
]:
assert not is_scalar(a)
assert not is_scalar(MockNumpyLikeArray(a))

def test_is_scalar_pandas_scalars(self):
assert is_scalar(Timestamp("2014-01-01"))
Expand Down

0 comments on commit d228a78

Please sign in to comment.