diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index b203c65b034c6..cdc78dc60ea01 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -28,6 +28,7 @@ Bug fixes ~~~~~~~~~ - Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`) - Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`) +- Bug in :class:`~arrays.ArrowExtensionArray` with duration dtype overflowing when constructed from data containing numpy ``NaT`` (:issue:`52843`) - Bug in :func:`Series.dt.round` when passing a ``freq`` of equal or higher resolution compared to the :class:`Series` would raise a ``ZeroDivisionError`` (:issue:`52761`) - Bug in :func:`Series.median` with :class:`ArrowDtype` returning an approximate median (:issue:`52679`) - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on categorical dtypes (:issue:`49889`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d83bf9d340993..51d6fa74ea94e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -258,7 +258,13 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal scalars = pa.array(scalars, from_pandas=True) if pa_dtype and scalars.type != pa_dtype: scalars = scalars.cast(pa_dtype) - return cls(scalars) + arr = cls(scalars) + if pa.types.is_duration(scalars.type) and scalars.null_count > 0: + # GH52843: upstream bug for duration types when originally + # constructed with data containing numpy NaT. + # https://github.com/apache/arrow/issues/35088 + arr = arr.fillna(arr.dtype.na_value) + return arr @classmethod def _from_sequence_of_strings( diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9191560bd9a68..0300b271acc3f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2830,3 +2830,19 @@ def test_date32_repr(): arrow_dt = pa.array([date.fromisoformat("2020-01-01")], type=pa.date32()) ser = pd.Series(arrow_dt, dtype=ArrowDtype(arrow_dt.type)) assert repr(ser) == "0 2020-01-01\ndtype: date32[day][pyarrow]" + + +@pytest.mark.xfail( + pa_version_under8p0, + reason="Function 'add_checked' has no kernel matching input types", + raises=pa.ArrowNotImplementedError, +) +def test_duration_overflow_from_ndarray_containing_nat(): + # GH52843 + data_ts = pd.to_datetime([1, None]) + data_td = pd.to_timedelta([1, None]) + ser_ts = pd.Series(data_ts, dtype=ArrowDtype(pa.timestamp("ns"))) + ser_td = pd.Series(data_td, dtype=ArrowDtype(pa.duration("ns"))) + result = ser_ts + ser_td + expected = pd.Series([2, None], dtype=ArrowDtype(pa.timestamp("ns"))) + tm.assert_series_equal(result, expected)