diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 84eb3b3f15780..493e5b53a5559 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -670,6 +670,7 @@ Missing - Bug in :meth:`.SeriesGroupBy.transform` now correctly handles missing values for ``dropna=False`` (:issue:`35014`) - Bug in :meth:`Series.nunique` with ``dropna=True`` was returning incorrect results when both ``NA`` and ``None`` missing values were present (:issue:`37566`) +- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and ``backfill`` (:issue:`31048`) - MultiIndex diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d912c908815f8..1c08888aa85fd 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1261,6 +1261,7 @@ def interpolate( axis=axis, inplace=inplace, limit=limit, + limit_area=limit_area, downcast=downcast, ) # validate the interp method @@ -1287,6 +1288,7 @@ def _interpolate_with_fill( axis: int = 0, inplace: bool = False, limit: Optional[int] = None, + limit_area: Optional[str] = None, downcast: Optional[str] = None, ) -> List["Block"]: """ fillna but using the interpolate machinery """ @@ -1301,6 +1303,7 @@ def _interpolate_with_fill( method=method, axis=axis, limit=limit, + limit_area=limit_area, ) blocks = [self.make_block_same_class(values, ndim=self.ndim)] diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0afffbc1460e0..e374ba435a0bd 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,13 +1,13 @@ """ Routines for filling missing data. """ - +from functools import partial from typing import Any, List, Optional, Set, Union import numpy as np from pandas._libs import algos, lib -from pandas._typing import ArrayLike, DtypeObj +from pandas._typing import ArrayLike, Axis, DtypeObj from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -528,16 +528,92 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat return P(x) +def _interpolate_with_limit_area( + values: ArrayLike, method: str, limit: Optional[int], limit_area: Optional[str] +) -> ArrayLike: + """ + Apply interpolation and limit_area logic to values along a to-be-specified axis. + + Parameters + ---------- + values: array-like + Input array. + method: str + Interpolation method. Could be "bfill" or "pad" + limit: int, optional + Index limit on interpolation. + limit_area: str + Limit area for interpolation. Can be "inside" or "outside" + + Returns + ------- + values: array-like + Interpolated array. + """ + + invalid = isna(values) + + if not invalid.all(): + first = find_valid_index(values, "first") + last = find_valid_index(values, "last") + + values = interpolate_2d( + values, + method=method, + limit=limit, + ) + + if limit_area == "inside": + invalid[first : last + 1] = False + elif limit_area == "outside": + invalid[:first] = invalid[last + 1 :] = False + + values[invalid] = np.nan + + return values + + def interpolate_2d( values, - method="pad", - axis=0, - limit=None, + method: str = "pad", + axis: Axis = 0, + limit: Optional[int] = None, + limit_area: Optional[str] = None, ): """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. + + Parameters + ---------- + values: array-like + Input array. + method: str, default "pad" + Interpolation method. Could be "bfill" or "pad" + axis: 0 or 1 + Interpolation axis + limit: int, optional + Index limit on interpolation. + limit_area: str, optional + Limit area for interpolation. Can be "inside" or "outside" + + Returns + ------- + values: array-like + Interpolated array. """ + if limit_area is not None: + return np.apply_along_axis( + partial( + _interpolate_with_limit_area, + method=method, + limit=limit, + limit_area=limit_area, + ), + axis, + values, + ) + orig_values = values transf = (lambda x: x) if axis == 0 else (lambda x: x.T) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 7c64d10675edd..8740a309eec13 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -458,6 +458,82 @@ def test_interp_limit_direction_raises(self, method, limit_direction, expected): with pytest.raises(ValueError, match=msg): s.interpolate(method=method, limit_direction=limit_direction) + @pytest.mark.parametrize( + "data, expected_data, kwargs", + ( + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan], + {"method": "pad", "limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "pad", "limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0], + {"method": "pad", "limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan], + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ( + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ( + range(5), + range(5), + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ), + ) + def test_interp_limit_area_with_pad(self, data, expected_data, kwargs): + # GH26796 + + s = Series(data) + expected = Series(expected_data) + result = s.interpolate(**kwargs) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data, expected_data, kwargs", + ( + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "outside", "limit": 1}, + ), + ), + ) + def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs): + # GH26796 + + s = Series(data) + expected = Series(expected_data) + result = s.interpolate(**kwargs) + tm.assert_series_equal(result, expected) + def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11])