Skip to content

Commit

Permalink
BUG in Series.interpolate: limit_area/limit_direction kwargs with met…
Browse files Browse the repository at this point in the history
…hod="pad"/"bfill" have no effect (#38106)
  • Loading branch information
arw2019 authored Dec 1, 2020
1 parent 43928e6 commit 1829a61
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ Missing

- Bug in :meth:`.SeriesGroupBy.transform` now correctly handles missing values for ``dropna=False`` (:issue:`35014`)
- Bug in :meth:`Series.nunique` with ``dropna=True`` was returning incorrect results when both ``NA`` and ``None`` missing values were present (:issue:`37566`)
- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and ``backfill`` (:issue:`31048`)
-

MultiIndex
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,7 @@ def interpolate(
axis=axis,
inplace=inplace,
limit=limit,
limit_area=limit_area,
downcast=downcast,
)
# validate the interp method
Expand All @@ -1287,6 +1288,7 @@ def _interpolate_with_fill(
axis: int = 0,
inplace: bool = False,
limit: Optional[int] = None,
limit_area: Optional[str] = None,
downcast: Optional[str] = None,
) -> List["Block"]:
""" fillna but using the interpolate machinery """
Expand All @@ -1301,6 +1303,7 @@ def _interpolate_with_fill(
method=method,
axis=axis,
limit=limit,
limit_area=limit_area,
)

blocks = [self.make_block_same_class(values, ndim=self.ndim)]
Expand Down
86 changes: 81 additions & 5 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""
Routines for filling missing data.
"""

from functools import partial
from typing import Any, List, Optional, Set, Union

import numpy as np

from pandas._libs import algos, lib
from pandas._typing import ArrayLike, DtypeObj
from pandas._typing import ArrayLike, Axis, DtypeObj
from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.cast import infer_dtype_from_array
Expand Down Expand Up @@ -528,16 +528,92 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
return P(x)


def _interpolate_with_limit_area(
values: ArrayLike, method: str, limit: Optional[int], limit_area: Optional[str]
) -> ArrayLike:
"""
Apply interpolation and limit_area logic to values along a to-be-specified axis.
Parameters
----------
values: array-like
Input array.
method: str
Interpolation method. Could be "bfill" or "pad"
limit: int, optional
Index limit on interpolation.
limit_area: str
Limit area for interpolation. Can be "inside" or "outside"
Returns
-------
values: array-like
Interpolated array.
"""

invalid = isna(values)

if not invalid.all():
first = find_valid_index(values, "first")
last = find_valid_index(values, "last")

values = interpolate_2d(
values,
method=method,
limit=limit,
)

if limit_area == "inside":
invalid[first : last + 1] = False
elif limit_area == "outside":
invalid[:first] = invalid[last + 1 :] = False

values[invalid] = np.nan

return values


def interpolate_2d(
values,
method="pad",
axis=0,
limit=None,
method: str = "pad",
axis: Axis = 0,
limit: Optional[int] = None,
limit_area: Optional[str] = None,
):
"""
Perform an actual interpolation of values, values will be make 2-d if
needed fills inplace, returns the result.
Parameters
----------
values: array-like
Input array.
method: str, default "pad"
Interpolation method. Could be "bfill" or "pad"
axis: 0 or 1
Interpolation axis
limit: int, optional
Index limit on interpolation.
limit_area: str, optional
Limit area for interpolation. Can be "inside" or "outside"
Returns
-------
values: array-like
Interpolated array.
"""
if limit_area is not None:
return np.apply_along_axis(
partial(
_interpolate_with_limit_area,
method=method,
limit=limit,
limit_area=limit_area,
),
axis,
values,
)

orig_values = values

transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
Expand Down
76 changes: 76 additions & 0 deletions pandas/tests/series/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,82 @@ def test_interp_limit_direction_raises(self, method, limit_direction, expected):
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method, limit_direction=limit_direction)

@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
{"method": "pad", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
{"method": "pad", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_pad(self, data, expected_data, kwargs):
# GH26796

s = Series(data)
expected = Series(expected_data)
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs):
# GH26796

s = Series(data)
expected = Series(expected_data)
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)

def test_interp_limit_direction(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
Expand Down

0 comments on commit 1829a61

Please sign in to comment.