diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 7768900a0f82e..7bd1b8f963726 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -28,6 +28,7 @@ Bug fixes ~~~~~~~~~ - Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`) - Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`) +- Bug in :class:`~arrays.ArrowExtensionArray` with duration dtype overflowing when constructed from data containing numpy ``NaT`` (:issue:`52843`) - Bug in :func:`Series.dt.round` when passing a ``freq`` of equal or higher resolution compared to the :class:`Series` would raise a ``ZeroDivisionError`` (:issue:`52761`) - Bug in :func:`Series.median` with :class:`ArrowDtype` returning an approximate median (:issue:`52679`) - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on categorical dtypes (:issue:`49889`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 5303a2447a5bf..dd8484050ef89 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -260,7 +260,13 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal scalars = pa.array(scalars, from_pandas=True) if pa_dtype: scalars = scalars.cast(pa_dtype) - return cls(scalars) + arr = cls(scalars) + if pa.types.is_duration(scalars.type) and scalars.null_count > 0: + # GH52843: upstream bug for duration types when originally + # constructed with data containing numpy NaT. + # https://github.com/apache/arrow/issues/35088 + arr = arr.fillna(arr.dtype.na_value) + return arr @classmethod def _from_sequence_of_strings( diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 095b892b3bc78..d557f5efc0a8a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2629,3 +2629,19 @@ def test_describe_numeric_data(pa_type): index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], ) tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail( + pa_version_under8p0, + reason="Function 'add_checked' has no kernel matching input types", + raises=pa.ArrowNotImplementedError, +) +def test_duration_overflow_from_ndarray_containing_nat(): + # GH52843 + data_ts = pd.to_datetime([1, None]) + data_td = pd.to_timedelta([1, None]) + ser_ts = pd.Series(data_ts, dtype=ArrowDtype(pa.timestamp("ns"))) + ser_td = pd.Series(data_td, dtype=ArrowDtype(pa.duration("ns"))) + result = ser_ts + ser_td + expected = pd.Series([2, None], dtype=ArrowDtype(pa.timestamp("ns"))) + tm.assert_series_equal(result, expected)