Skip to content

Commit

Permalink
BUG: isin incorrectly casting ints to datetimes (#37528)
Browse files Browse the repository at this point in the history
* BUG: isin incorrectly casting ints to datetimes

* GH ref

* add asvs
  • Loading branch information
jbrockmendel authored Nov 22, 2020
1 parent 1e21130 commit 0823ed4
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 1 deletion.
24 changes: 23 additions & 1 deletion asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from pandas import NaT, Series, date_range
from pandas import Categorical, NaT, Series, date_range

from .pandas_vb_common import tm

Expand Down Expand Up @@ -36,6 +36,28 @@ def time_isin(self, dtypes):
self.s.isin(self.values)


class IsInDatetime64:
def setup(self):
dti = date_range(
start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s"
)
self.ser = Series(dti)
self.subset = self.ser._values[::3]
self.cat_subset = Categorical(self.subset)

def time_isin(self):
self.ser.isin(self.subset)

def time_isin_cat_values(self):
self.ser.isin(self.cat_subset)

def time_isin_mismatched_dtype(self):
self.ser.isin([1, 2])

def time_isin_empty(self):
self.ser.isin([])


class IsInFloat64:
def setup(self):
self.small = Series([1, 2], dtype=np.float64)
Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,9 @@ Datetimelike
- Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`)
- Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`)
- Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`)
- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`DatetimeIndex.isin` incorrectly casting integers to datetimes (:issue:`36621`)
- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`)
- Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`)

Timedelta
^^^^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,12 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
# handle categoricals
return cast("Categorical", comps).isin(values)

if needs_i8_conversion(comps):
# Dispatch to DatetimeLikeIndexMixin.isin
from pandas import Index

return Index(comps).isin(values)

comps, dtype = _ensure_data(comps)
values, _ = _ensure_data(values, dtype=dtype)

Expand Down
28 changes: 28 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,12 +521,40 @@ def isin(self, values, level=None):
if level is not None:
self._validate_index_level(level)

if not hasattr(values, "dtype"):
values = np.asarray(values)

if values.dtype.kind in ["f", "i", "u", "c"]:
# TODO: de-duplicate with equals, validate_comparison_value
return np.zeros(self.shape, dtype=bool)

if not isinstance(values, type(self)):
inferrable = [
"timedelta",
"timedelta64",
"datetime",
"datetime64",
"date",
"period",
]
if values.dtype == object:
inferred = lib.infer_dtype(values, skipna=False)
if inferred not in inferrable:
if "mixed" in inferred:
return self.astype(object).isin(values)
return np.zeros(self.shape, dtype=bool)

try:
values = type(self)(values)
except ValueError:
return self.astype(object).isin(values)

try:
self._data._check_compatible_with(values)
except (TypeError, ValueError):
# Includes tzawareness mismatch and IncompatibleFrequencyError
return np.zeros(self.shape, dtype=bool)

return algorithms.isin(self.asi8, values.asi8)

def shift(self, periods=1, freq=None):
Expand Down
55 changes: 55 additions & 0 deletions pandas/tests/series/methods/test_isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
from pandas import Series, date_range
import pandas._testing as tm
from pandas.core.arrays import PeriodArray


class TestSeriesIsIn:
Expand Down Expand Up @@ -90,6 +91,60 @@ def test_isin_read_only(self):
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("dtype", [object, None])
def test_isin_dt64_values_vs_ints(self, dtype):
# GH#36621 dont cast integers to datetimes for isin
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)

comps = np.asarray([1356998400000000000], dtype=dtype)

res = dti.isin(comps)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)

res = ser.isin(comps)
tm.assert_series_equal(res, Series(expected))

res = pd.core.algorithms.isin(ser, comps)
tm.assert_numpy_array_equal(res, expected)

def test_isin_tzawareness_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)

other = dti.tz_localize("UTC")

res = dti.isin(other)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)

res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))

res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)

def test_isin_period_freq_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
pi = dti.to_period("M")
ser = Series(pi)

# We construct another PeriodIndex with the same i8 values
# but different dtype
dtype = dti.to_period("Y").dtype
other = PeriodArray._simple_new(pi.asi8, dtype=dtype)

res = pi.isin(other)
expected = np.array([False] * len(pi), dtype=bool)
tm.assert_numpy_array_equal(res, expected)

res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))

res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)


@pytest.mark.slow
def test_isin_large_series_mixed_dtypes_and_nan():
Expand Down

0 comments on commit 0823ed4

Please sign in to comment.