From 9afe992c7f12eeb8186b165ad9685889071ec472 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 17:02:22 +0100 Subject: [PATCH 01/37] Added failing test for https://github.com/pandas-dev/pandas/issues/26796 This test currently only test `limit_area`. For `limit_direction` the implementation should later raise an error, because `pad` and `bfill` both already define a direction. But let's now first do the implementation of the `limit_area` for `pad` and `bfill`. --- pandas/tests/series/test_missing.py | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 6b7d9e00a5228..80d66919a7776 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1382,6 +1382,61 @@ def test_interp_limit_area(self): with pytest.raises(ValueError, match=msg): s.interpolate(method="linear", limit_area="abc") + def test_interp_limit_area_with_pad(self): + # Test for issue #26796 + s = Series( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series( + [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="pad", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="pad", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + result = s.interpolate(method="pad", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) + result = s.interpolate(method="pad", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + def test_interp_limit_area_with_backfill(self): + # Test for issue #26796 + s = Series( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series( + [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="bfill", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="bfill", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series( + [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + result = s.interpolate(method="bfill", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) + result = s.interpolate(method="bfill", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 3a191b9b1acd38ad975b60af762ebe06aac77def Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 17:57:27 +0100 Subject: [PATCH 02/37] Added implementation to support `limit_area` Since methods `pad` and `bfill` in `blocks.interpolate` end up using `missing.interpolate_2d` which can not (easily) be extended to support `limit_area`, I introduce the new function `missing.interpolate_1d_fill`. It is a modified copy of `interpolate_2d` but only works for 1d data and uses newly introduced function `_derive_indices_of_nans_to_preserve`, which is now also used in `missing.interpolate_1d`. It works the same way as the 1D-interpolation functions which are based on scipy-interpolation which are applied via np.apply_along_axis. --- pandas/core/internals/blocks.py | 36 ++++++-- pandas/core/missing.py | 146 ++++++++++++++++++++++++-------- 2 files changed, 140 insertions(+), 42 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5fe5290fa65f1..b47fa6130db39 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1116,6 +1116,7 @@ def check_int_bool(self, inplace): axis=axis, inplace=inplace, limit=limit, + limit_area=limit_area, fill_value=fill_value, coerce=coerce, downcast=downcast, @@ -1146,6 +1147,7 @@ def _interpolate_with_fill( axis=0, inplace=False, limit=None, + limit_area=None, fill_value=None, coerce=False, downcast=None, @@ -1168,14 +1170,32 @@ def _interpolate_with_fill( # We only get here for non-ExtensionBlock fill_value = convert_scalar(self.values, fill_value) - values = missing.interpolate_2d( - values, - method=method, - axis=axis, - limit=limit, - fill_value=fill_value, - dtype=self.dtype, - ) + # We have to distinguish two cases: + # 1. When kwarg `limit_area` is used: It is not + # supported by `missing.interpolate_2d()`. Using this kwarg only + # works by applying the fill along a certain axis. + # 2. All other cases: Then, `missing.interpolate_2d()` can be used. + if limit_area is not None: + def func(x): + return missing.interpolate_1d_fill( + x, + method=method, + axis=axis, + limit=limit, + limit_area=limit_area, + fill_value=fill_value, + dtype=self.dtype, + ) + interp_values = np.apply_along_axis(func, axis, values) + else: + values = missing.interpolate_2d( + values, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=self.dtype, + ) blocks = [self.make_block_same_class(values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b30a7a24f3495..709ee1464ccfe 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -222,40 +222,14 @@ def interpolate_1d( # default limit is unlimited GH #16282 limit = algos._validate_limit(nobs=None, limit=limit) - # These are sets of index pointers to invalid values... i.e. {0, 1, etc... - all_nans = set(np.flatnonzero(invalid)) - start_nans = set(range(find_valid_index(yvalues, "first"))) - end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) - mid_nans = all_nans - start_nans - end_nans - - # Like the sets above, preserve_nans contains indices of invalid values, - # but in this case, it is the final set of indices that need to be - # preserved as NaN after the interpolation. - - # For example if limit_direction='forward' then preserve_nans will - # contain indices of NaNs at the beginning of the series, and NaNs that - # are more than'limit' away from the prior non-NaN. - - # set preserve_nans based on direction using _interp_limit - if limit_direction == "forward": - preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) - elif limit_direction == "backward": - preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) - else: - # both directions... just use _interp_limit - preserve_nans = set(_interp_limit(invalid, limit, limit)) - - # if limit_area is set, add either mid or outside indices - # to preserve_nans GH #16284 - if limit_area == "inside": - # preserve NaNs on the outside - preserve_nans |= start_nans | end_nans - elif limit_area == "outside": - # preserve NaNs on the inside - preserve_nans |= mid_nans - - # sort preserve_nans and covert to list - preserve_nans = sorted(preserve_nans) + preserve_nans = _derive_indices_of_nans_to_preserve( + yvalues=yvalues, + valid=valid, + invalid=invalid, + limit=limit, + limit_area=limit_area, + limit_direction=limit_direction, + ) xvalues = getattr(xvalues, "values", xvalues) yvalues = getattr(yvalues, "values", yvalues) @@ -313,6 +287,51 @@ def interpolate_1d( result[preserve_nans] = np.nan return result +def _derive_indices_of_nans_to_preserve( + yvalues, valid, invalid, limit, limit_area, limit_direction, +): + """ Derive the indices of NaNs that shall be preserved after interpolation + This function is called by `interpolate_1d` and takes the arguments with + the same name from there. In `interpolate_1d`, after performing the + interpolation the list of indices of NaNs to preserve is used to put + NaNs in the desired locations. + """ + + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... + all_nans = set(np.flatnonzero(invalid)) + start_nans = set(range(find_valid_index(yvalues, "first"))) + end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) + mid_nans = all_nans - start_nans - end_nans + + # Like the sets above, preserve_nans contains indices of invalid values, + # but in this case, it is the final set of indices that need to be + # preserved as NaN after the interpolation. + + # For example if limit_direction='forward' then preserve_nans will + # contain indices of NaNs at the beginning of the series, and NaNs that + # are more than'limit' away from the prior non-NaN. + + # set preserve_nans based on direction using _interp_limit + if limit_direction == "forward": + preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) + elif limit_direction == "backward": + preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) + else: + # both directions... just use _interp_limit + preserve_nans = set(_interp_limit(invalid, limit, limit)) + + # if limit_area is set, add either mid or outside indices + # to preserve_nans GH #16284 + if limit_area == "inside": + # preserve NaNs on the outside + preserve_nans |= start_nans | end_nans + elif limit_area == "outside": + # preserve NaNs on the inside + preserve_nans |= mid_nans + + # sort preserve_nans and covert to list + preserve_nans = sorted(preserve_nans) + return preserve_nans def _interpolate_scipy_wrapper( x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs @@ -477,6 +496,65 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): else: return [P(x, nu) for nu in der] +def interpolate_1d_fill( + values, + method="pad", + axis=0, + limit=None, + limit_area=None, + fill_value=None, + dtype=None, +): + """ + This is a 1D-versoin of `interpolate_2d`, which is used for methods `pad` + and `backfill` when interpolating. This 1D-version is necessary to be + able to handle kwarg `limit_area` via the function + ` _derive_indices_of_nans_to_preserve`. It is used the same way as the + 1D-interpolation functions which are based on scipy-interpolation, i.e. + via np.apply_along_axis. + """ + if method == "pad": + limit_direction = "forward" + elif method == "backfill": + limit_direction = "backward" + else: + raise ValueError("`method` must be either 'pad' or 'backfill'.") + + orig_values = values + + yvalues = values + invalid = isna(yvalues) + valid = ~invalid + + if values.ndim > 1: + raise AssertionError("This only works with 1D data.") + + if fill_value is None: + mask = None + else: # todo create faster fill func without masking + mask = mask_missing(values, fill_value) + + preserve_nans = _derive_indices_of_nans_to_preserve( + yvalues=yvalues, + valid=valid, + invalid=invalid, + limit=limit, + limit_area=limit_area, + limit_direction=limit_direction, + ) + + method = clean_fill_method(method) + if method == "pad": + values = pad_1d(values, limit=limit, mask=mask, dtype=dtype) + else: + values = backfill_1d(values, limit=limit, mask=mask, dtype=dtype) + + if orig_values.dtype.kind == "M": + # convert float back to datetime64 + values = values.astype(orig_values.dtype) + + values[preserve_nans] = fill_value + return values def interpolate_2d( values, method="pad", axis=0, limit=None, fill_value=None, dtype=None From fd5d8e845827b185acd387b262041f7ec9d2434e Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 18:01:36 +0100 Subject: [PATCH 03/37] fix test --- pandas/tests/series/test_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 80d66919a7776..8a45bd5c90a3f 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1431,7 +1431,7 @@ def test_interp_limit_area_with_backfill(self): tm.assert_series_equal(result, expected) expected = Series( - [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan] ) result = s.interpolate(method="bfill", limit_area="outside", limit=1) tm.assert_series_equal(result, expected) From 26d88ed1a642b73bd54b7a87b9d55ae5f912a8d1 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 22:06:00 +0100 Subject: [PATCH 04/37] pep8 --- pandas/core/missing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 709ee1464ccfe..d324143c9a450 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -287,6 +287,7 @@ def interpolate_1d( result[preserve_nans] = np.nan return result + def _derive_indices_of_nans_to_preserve( yvalues, valid, invalid, limit, limit_area, limit_direction, ): @@ -333,6 +334,7 @@ def _derive_indices_of_nans_to_preserve( preserve_nans = sorted(preserve_nans) return preserve_nans + def _interpolate_scipy_wrapper( x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs ): @@ -496,6 +498,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): else: return [P(x, nu) for nu in der] + def interpolate_1d_fill( values, method="pad", @@ -556,6 +559,7 @@ def interpolate_1d_fill( values[preserve_nans] = fill_value return values + def interpolate_2d( values, method="pad", axis=0, limit=None, fill_value=None, dtype=None ): From 6597acae03ccb1fdd9beb71ad0aa356f0c7fb91d Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 22:07:27 +0100 Subject: [PATCH 05/37] fixed small error that actually had no effect since the input array `values` also was changed via appliying `func` --- pandas/core/internals/blocks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b47fa6130db39..5426120ff3bbb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1186,9 +1186,10 @@ def func(x): fill_value=fill_value, dtype=self.dtype, ) + # Beware that this also change the input array `values`! interp_values = np.apply_along_axis(func, axis, values) else: - values = missing.interpolate_2d( + interp_values = missing.interpolate_2d( values, method=method, axis=axis, @@ -1197,7 +1198,7 @@ def func(x): dtype=self.dtype, ) - blocks = [self.make_block_same_class(values, ndim=self.ndim)] + blocks = [self.make_block_same_class(interp_values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) def _interpolate( From c536d3c1340cce275a06421e33c85af04f047225 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 22:32:51 +0100 Subject: [PATCH 06/37] Raise when forbidden combination of `method` and `limit_direction` are used Test for all forbidden combos of `pad` and `backfill` is included --- pandas/core/generic.py | 25 ++++++++++++++++++++++++- pandas/tests/series/test_missing.py | 20 ++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c413cd473bbc..e9c1cd06d557f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6680,7 +6680,7 @@ def interpolate( axis=0, limit=None, inplace=False, - limit_direction="forward", + limit_direction=None, limit_area=None, downcast=None, **kwargs, @@ -6720,6 +6720,29 @@ def interpolate( "column to a numeric dtype." ) + # Set `limit_direction` depending on `method` + if (method == "pad") or (method == "ffill"): + if (limit_direction == "backward") or ( + limit_direction == "both"): + raise ValueError( + f"`limit_direction` must not be `{limit_direction}` " + f"for method `{method}`" + ) + else: + limit_direction = "forward" + elif (method == "backfill") or (method == "bfill"): + if (limit_direction == "forward") or (limit_direction == "both"): + raise ValueError( + f"`limit_direction` must not be `{limit_direction}` " + f"for method `{method}`" + ) + else: + limit_direction = "backward" + else: + # Set default + if limit_direction is None: + limit_direction = "forward" + # create/use the index if method == "linear": # prior default diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8a45bd5c90a3f..39657eab8cb61 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1436,6 +1436,26 @@ def test_interp_limit_area_with_backfill(self): result = s.interpolate(method="bfill", limit_area="outside", limit=1) tm.assert_series_equal(result, expected) + def test_interp_raise_limit_direction_and_pad_or_bfill(self): + s = Series([1, 2, 3]) + forbidden_combinations = [ + ("pad", "backward"), + ("ffill", "backward"), + ("backfill", "forward"), + ("bfill", "forward"), + ("pad", "both"), + ("ffill", "both"), + ("backfill", "both"), + ("bfill", "both"), + ] + + for method, limit_direction in forbidden_combinations: + msg = ( + f"`limit_direction` must not be `{limit_direction}` " + f"for method `{method}`" + ) + with pytest.raises(ValueError, match=msg): + s.interpolate(method=method, limit_direction=limit_direction) def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. From ed9cf219529ac5cc305f77d89060fc8722b79c06 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 22:35:59 +0100 Subject: [PATCH 07/37] Updated docstring with info about allowed combinations of `method` and `limit_direction` --- pandas/core/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e9c1cd06d557f..929bbde7fb530 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6527,9 +6527,11 @@ def replace( 0. inplace : bool, default False Update the data in place if possible. - limit_direction : {'forward', 'backward', 'both'}, default 'forward' + limit_direction : {'forward', 'backward', 'both'}, default is None If limit is specified, consecutive NaNs will be filled in this - direction. + direction. If the methods 'pad' or 'ffill' are used it must be + None or 'forward'. If 'backfill' or 'bfill' are use it must be + None or 'backwards'. limit_area : {`None`, 'inside', 'outside'}, default None If limit is specified, consecutive NaNs will be filled with this restriction. From 298032561d920594eb2f672feff44dda2fdef408 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 22:41:29 +0100 Subject: [PATCH 08/37] clean up * black formatting * typo --- pandas/core/generic.py | 3 +-- pandas/core/internals/blocks.py | 4 +++- pandas/core/missing.py | 2 +- pandas/tests/series/test_missing.py | 18 ++++++------------ 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 929bbde7fb530..5dba69869dda3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6724,8 +6724,7 @@ def interpolate( # Set `limit_direction` depending on `method` if (method == "pad") or (method == "ffill"): - if (limit_direction == "backward") or ( - limit_direction == "both"): + if (limit_direction == "backward") or (limit_direction == "both"): raise ValueError( f"`limit_direction` must not be `{limit_direction}` " f"for method `{method}`" diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5426120ff3bbb..426f0b8356712 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1176,6 +1176,7 @@ def _interpolate_with_fill( # works by applying the fill along a certain axis. # 2. All other cases: Then, `missing.interpolate_2d()` can be used. if limit_area is not None: + def func(x): return missing.interpolate_1d_fill( x, @@ -1186,7 +1187,8 @@ def func(x): fill_value=fill_value, dtype=self.dtype, ) - # Beware that this also change the input array `values`! + + # Beware that this also changes the input array `values`! interp_values = np.apply_along_axis(func, axis, values) else: interp_values = missing.interpolate_2d( diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d324143c9a450..f246b9c1c8ff9 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -289,7 +289,7 @@ def interpolate_1d( def _derive_indices_of_nans_to_preserve( - yvalues, valid, invalid, limit, limit_area, limit_direction, + yvalues, valid, invalid, limit, limit_area, limit_direction ): """ Derive the indices of NaNs that shall be preserved after interpolation This function is called by `interpolate_1d` and takes the arguments with diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 39657eab8cb61..3aebef2c01a86 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1384,11 +1384,9 @@ def test_interp_limit_area(self): def test_interp_limit_area_with_pad(self): # Test for issue #26796 - s = Series( - [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - expected = Series( - [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) + expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="pad", limit_area="inside") tm.assert_series_equal(result, expected) @@ -1398,8 +1396,7 @@ def test_interp_limit_area_with_pad(self): result = s.interpolate(method="pad", limit_area="inside", limit=1) tm.assert_series_equal(result, expected) - expected = Series( - [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) result = s.interpolate(method="pad", limit_area="outside") tm.assert_series_equal(result, expected) @@ -1411,11 +1408,9 @@ def test_interp_limit_area_with_pad(self): def test_interp_limit_area_with_backfill(self): # Test for issue #26796 - s = Series( - [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - expected = Series( - [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) + expected = Series([np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="bfill", limit_area="inside") tm.assert_series_equal(result, expected) @@ -1425,8 +1420,7 @@ def test_interp_limit_area_with_backfill(self): result = s.interpolate(method="bfill", limit_area="inside", limit=1) tm.assert_series_equal(result, expected) - expected = Series( - [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) result = s.interpolate(method="bfill", limit_area="outside") tm.assert_series_equal(result, expected) From ecf428e9f8ecacf3e7c3fe9e30abbc8e67d45d50 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 22:55:50 +0100 Subject: [PATCH 09/37] Added entry to whatsnew file --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c423933d4c438..0f293cdc6bf2b 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1040,7 +1040,7 @@ Indexing Missing ^^^^^^^ -- +- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and `backfill`` (:issue:`31048`) - MultiIndex From f8a342375229ae13d2cc40042b4ea80552f06316 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 23:10:59 +0100 Subject: [PATCH 10/37] Removed `axis` kwarg from `interpolate_1d_fill` because it was unused --- pandas/core/internals/blocks.py | 1 - pandas/core/missing.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 426f0b8356712..b40477be64034 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1181,7 +1181,6 @@ def func(x): return missing.interpolate_1d_fill( x, method=method, - axis=axis, limit=limit, limit_area=limit_area, fill_value=fill_value, diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f246b9c1c8ff9..0fb660a99600a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -502,7 +502,6 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): def interpolate_1d_fill( values, method="pad", - axis=0, limit=None, limit_area=None, fill_value=None, From 67331861419d5b9a9ca5fb8a6adcb1725199531e Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 23:18:45 +0100 Subject: [PATCH 11/37] Type annotations added to new function `interpolate_1d_fill` --- pandas/core/missing.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0fb660a99600a..b946c9bdca716 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -19,6 +19,7 @@ needs_i8_conversion, ) from pandas.core.dtypes.missing import isna +from pandas._typing import Optional, Hashable, Dtype def mask_missing(arr, values_to_mask): @@ -501,11 +502,11 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): def interpolate_1d_fill( values, - method="pad", - limit=None, - limit_area=None, - fill_value=None, - dtype=None, + method: str = "pad", + limit: Optional[int] = None, + limit_area: Optional[str] = None, + fill_value: Optional[Hashable] = None, + dtype: Optional[Dtype] = None, ): """ This is a 1D-versoin of `interpolate_2d`, which is used for methods `pad` From c5b77d24b6566b1c0d06d6872eb0116ca0d45384 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 15 Jan 2020 23:46:09 +0100 Subject: [PATCH 12/37] fixed incorrectly sorted imports --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b946c9bdca716..06cbcde1aa127 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,6 +5,7 @@ import numpy as np from pandas._libs import algos, lib +from pandas._typing import Dtype, Hashable, Optional from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -19,7 +20,6 @@ needs_i8_conversion, ) from pandas.core.dtypes.missing import isna -from pandas._typing import Optional, Hashable, Dtype def mask_missing(arr, values_to_mask): From 0bb36de260e7e46b74a6bb7340be9e00eec55bf0 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 5 Feb 2020 19:56:51 +0100 Subject: [PATCH 13/37] Added type annotation, updated docstring and removed unnecessary arguments Test on my local machine are not affected by removing the unncessery arguments `valid` and `invalid`, which are now derived within the function. --- pandas/core/missing.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 06cbcde1aa127..7572cef90e49c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import algos, lib -from pandas._typing import Dtype, Hashable, Optional +from pandas._typing import ArrayLike, Dtype, Hashable, Optional from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -225,8 +225,6 @@ def interpolate_1d( preserve_nans = _derive_indices_of_nans_to_preserve( yvalues=yvalues, - valid=valid, - invalid=invalid, limit=limit, limit_area=limit_area, limit_direction=limit_direction, @@ -290,15 +288,35 @@ def interpolate_1d( def _derive_indices_of_nans_to_preserve( - yvalues, valid, invalid, limit, limit_area, limit_direction + yvalues: ArrayLike, + limit: int, + limit_area: str, + limit_direction: str, ): - """ Derive the indices of NaNs that shall be preserved after interpolation + """ + Derive the indices of NaNs that shall be preserved after interpolation This function is called by `interpolate_1d` and takes the arguments with the same name from there. In `interpolate_1d`, after performing the - interpolation the list of indices of NaNs to preserve is used to put + interpolation, the list of indices of NaNs to preserve is used to put NaNs in the desired locations. + + Parameters + ---------- + yvalues: ArrayLike + 1-d array of values of the initial Series or DataFrame + limit: int + limit_area: str + limit_direction: str + + Returns + ------- + preserve_nans: set + Set of index pointers to where NaNs should be preserved in `yvalues` """ + invalid = isna(yvalues) + valid = ~invalid + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(find_valid_index(yvalues, "first"))) @@ -539,8 +557,6 @@ def interpolate_1d_fill( preserve_nans = _derive_indices_of_nans_to_preserve( yvalues=yvalues, - valid=valid, - invalid=invalid, limit=limit, limit_area=limit_area, limit_direction=limit_direction, From a467afdd4afc7b0753fbad7e2fa8ff2193f8f693 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 18 Feb 2020 20:41:52 +0100 Subject: [PATCH 14/37] Reverting docstring entry for default value of `limit_direction` --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5dba69869dda3..9a02b64ad44bd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6527,7 +6527,7 @@ def replace( 0. inplace : bool, default False Update the data in place if possible. - limit_direction : {'forward', 'backward', 'both'}, default is None + limit_direction : {'forward', 'backward', 'both'}, default is 'forward' If limit is specified, consecutive NaNs will be filled in this direction. If the methods 'pad' or 'ffill' are used it must be None or 'forward'. If 'backfill' or 'bfill' are use it must be From 5466d8c51c955f153b681444e047cc2674893553 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 18 Feb 2020 20:48:09 +0100 Subject: [PATCH 15/37] Moved logic for calling `missing.interpolate_1d_fill` to `missing.interpolate_2d` --- pandas/core/internals/blocks.py | 37 ++++------------ pandas/core/missing.py | 76 ++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 53 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b40477be64034..0956c612ae066 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1170,34 +1170,15 @@ def _interpolate_with_fill( # We only get here for non-ExtensionBlock fill_value = convert_scalar(self.values, fill_value) - # We have to distinguish two cases: - # 1. When kwarg `limit_area` is used: It is not - # supported by `missing.interpolate_2d()`. Using this kwarg only - # works by applying the fill along a certain axis. - # 2. All other cases: Then, `missing.interpolate_2d()` can be used. - if limit_area is not None: - - def func(x): - return missing.interpolate_1d_fill( - x, - method=method, - limit=limit, - limit_area=limit_area, - fill_value=fill_value, - dtype=self.dtype, - ) - - # Beware that this also changes the input array `values`! - interp_values = np.apply_along_axis(func, axis, values) - else: - interp_values = missing.interpolate_2d( - values, - method=method, - axis=axis, - limit=limit, - fill_value=fill_value, - dtype=self.dtype, - ) + interp_values = missing.interpolate_2d( + values, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + limit_area=limit_area, + dtype=self.dtype, + ) blocks = [self.make_block_same_class(interp_values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7572cef90e49c..706bc07ff94ad 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -577,7 +577,13 @@ def interpolate_1d_fill( def interpolate_2d( - values, method="pad", axis=0, limit=None, fill_value=None, dtype=None + values, + method="pad", + axis=0, + limit=None, + fill_value=None, + limit_area=None, + dtype=None, ): """ Perform an actual interpolation of values, values will be make 2-d if @@ -585,35 +591,55 @@ def interpolate_2d( """ orig_values = values - transf = (lambda x: x) if axis == 0 else (lambda x: x.T) - - # reshape a 1 dim if needed - ndim = values.ndim - if values.ndim == 1: - if axis != 0: # pragma: no cover - raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") - values = values.reshape(tuple((1,) + values.shape)) + # We have to distinguish two cases: + # 1. When kwarg `limit_area` is used: It is not + # supported by `pad_2d` and `backfill_2d`. Using this kwarg only + # works by applying the fill along a certain axis. + # 2. All other cases. + if limit_area is not None: - if fill_value is None: - mask = None - else: # todo create faster fill func without masking - mask = mask_missing(transf(values), fill_value) + def func(x): + return interpolate_1d_fill( + x, + method=method, + limit=limit, + limit_area=limit_area, + fill_value=fill_value, + dtype=dtype, + ) - method = clean_fill_method(method) - if method == "pad": - values = transf(pad_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) + # Beware that this also changes the input array `values`! + values = np.apply_along_axis(func, axis, values) else: - values = transf( - backfill_2d(transf(values), limit=limit, mask=mask, dtype=dtype) - ) + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) + + # reshape a 1 dim if needed + ndim = values.ndim + if values.ndim == 1: + if axis != 0: # pragma: no cover + raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") + values = values.reshape(tuple((1,) + values.shape)) + + if fill_value is None: + mask = None + else: # todo create faster fill func without masking + mask = mask_missing(transf(values), fill_value) + + method = clean_fill_method(method) + if method == "pad": + values = transf(pad_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) + else: + values = transf( + backfill_2d(transf(values), limit=limit, mask=mask, dtype=dtype) + ) - # reshape back - if ndim == 1: - values = values[0] + # reshape back + if ndim == 1: + values = values[0] - if orig_values.dtype.kind == "M": - # convert float back to datetime64 - values = values.astype(orig_values.dtype) + if orig_values.dtype.kind == "M": + # convert float back to datetime64 + values = values.astype(orig_values.dtype) return values From 3e968fc2eac5a6a88e1689ba853936ef7a03d310 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 18 Feb 2020 20:51:35 +0100 Subject: [PATCH 16/37] Moved whatsnew entry to v1.1.0.rst --- doc/source/whatsnew/v1.0.0.rst | 2 +- doc/source/whatsnew/v1.1.0.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 0f293cdc6bf2b..c423933d4c438 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1040,7 +1040,7 @@ Indexing Missing ^^^^^^^ -- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and `backfill`` (:issue:`31048`) +- - MultiIndex diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 721bcb0758992..069bc251b90cd 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -108,7 +108,7 @@ Indexing Missing ^^^^^^^ -- +- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and `backfill`` (:issue:`31048`) - MultiIndex From 556a3cf4238556ed8510b99a7c5c392c1bc4c04b Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 18 Feb 2020 20:54:59 +0100 Subject: [PATCH 17/37] clean up -black formating -remove variables that are now obsolete --- pandas/core/missing.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 706bc07ff94ad..de5d1cfc5aca1 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -288,10 +288,7 @@ def interpolate_1d( def _derive_indices_of_nans_to_preserve( - yvalues: ArrayLike, - limit: int, - limit_area: str, - limit_direction: str, + yvalues: ArrayLike, limit: int, limit_area: str, limit_direction: str ): """ Derive the indices of NaNs that shall be preserved after interpolation @@ -544,8 +541,6 @@ def interpolate_1d_fill( orig_values = values yvalues = values - invalid = isna(yvalues) - valid = ~invalid if values.ndim > 1: raise AssertionError("This only works with 1D data.") From 6c1e429f73ede97b047d3f691d6666040f24439c Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 18 Feb 2020 22:02:28 +0100 Subject: [PATCH 18/37] fixed missing Optional in type definition --- pandas/core/missing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index de5d1cfc5aca1..9192f9708d981 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -288,7 +288,10 @@ def interpolate_1d( def _derive_indices_of_nans_to_preserve( - yvalues: ArrayLike, limit: int, limit_area: str, limit_direction: str + yvalues: ArrayLike, + limit: Optional[int] = None, + limit_area: Optional[str] = None, + limit_direction: Optional[str] = None, ): """ Derive the indices of NaNs that shall be preserved after interpolation From 767b0ca94e6c4e219d1d50dee2a2aaba373dc13e Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 17 Mar 2020 00:16:36 +0100 Subject: [PATCH 19/37] small fix so that CI type validation does not complain --- pandas/core/missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9192f9708d981..07123cc6d8c8f 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -350,8 +350,8 @@ def _derive_indices_of_nans_to_preserve( preserve_nans |= mid_nans # sort preserve_nans and covert to list - preserve_nans = sorted(preserve_nans) - return preserve_nans + preserve_nans_sorted = sorted(preserve_nans) + return preserve_nans_sorted def _interpolate_scipy_wrapper( From 26ef7b53f79a710cbd4e03a387f0b93caa58ee39 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Thu, 19 Mar 2020 10:52:25 +0100 Subject: [PATCH 20/37] Apply suggestions from code review concerning list instead of set Co-Authored-By: William Ayd --- pandas/core/missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d97b07b448743..72c9195d838be 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -292,7 +292,7 @@ def _derive_indices_of_nans_to_preserve( limit: Optional[int] = None, limit_area: Optional[str] = None, limit_direction: Optional[str] = None, -): +) -> List[int]: """ Derive the indices of NaNs that shall be preserved after interpolation This function is called by `interpolate_1d` and takes the arguments with @@ -310,7 +310,7 @@ def _derive_indices_of_nans_to_preserve( Returns ------- - preserve_nans: set + preserve_nans: list of int Set of index pointers to where NaNs should be preserved in `yvalues` """ From b4b6b5a8a1b737952f4d7137834e971ad4f4a30b Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Thu, 19 Mar 2020 11:09:35 +0100 Subject: [PATCH 21/37] added import for missing List type --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 72c9195d838be..0db2be1a24655 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import algos, lib -from pandas._typing import ArrayLike, Dtype, Hashable, Optional +from pandas._typing import ArrayLike, Dtype, Hashable, Optional, List from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array From e259549e2f369bd07a731681d1278e1e5d605007 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Thu, 19 Mar 2020 11:31:32 +0100 Subject: [PATCH 22/37] fixed unsorted order of imports --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0db2be1a24655..d7d6f6a2dab3d 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import algos, lib -from pandas._typing import ArrayLike, Dtype, Hashable, Optional, List +from pandas._typing import ArrayLike, Dtype, Hashable, List, Optional from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array From 7c5ad7d9f838479cee5c5514d9bb9f8581c1a98f Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 25 Aug 2020 22:33:05 +0200 Subject: [PATCH 23/37] Added tests back in Tests should be red now --- .../tests/series/methods/test_interpolate.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index c4b10e0ccdc3e..46c727ecf16ad 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -450,6 +450,60 @@ def test_interp_limit_direction_raises(self, method, limit_direction, expected): with pytest.raises(ValueError, match=msg): s.interpolate(method=method, limit_direction=limit_direction) + def test_interp_limit_area_with_pad(self): + # Test for issue #26796 + s = Series( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series( + [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="pad", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="pad", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + result = s.interpolate(method="pad", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) + result = s.interpolate(method="pad", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + def test_interp_limit_area_with_backfill(self): + # Test for issue #26796 + s = Series( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series( + [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="bfill", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="bfill", limit_area="inside", limit=1) + tm.assert_series_equal(result, expected) + + expected = Series( + [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + result = s.interpolate(method="bfill", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="bfill", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 92148ff8d3cc7d3cf431262bbb5f61bd93df2202 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 26 Aug 2020 22:25:50 +0200 Subject: [PATCH 24/37] Added new solution to account for limit_area with pad provided by https://github.com/pandas-dev/pandas/pull/34749 Test are green now --- pandas/core/internals/blocks.py | 36 ++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c62be4f767f00..1b9322d5049c5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +import functools import inspect import re from typing import TYPE_CHECKING, Any, List, Optional @@ -1127,6 +1128,7 @@ def interpolate( axis=axis, inplace=inplace, limit=limit, + limit_area=limit_area, fill_value=fill_value, coerce=coerce, downcast=downcast, @@ -1155,6 +1157,7 @@ def _interpolate_with_fill( axis: int = 0, inplace: bool = False, limit: Optional[int] = None, + limit_area: Optional[str] = None, fill_value: Optional[Any] = None, coerce: bool = False, downcast: Optional[str] = None, @@ -1176,15 +1179,42 @@ def _interpolate_with_fill( # We only get here for non-ExtensionBlock fill_value = convert_scalar_for_putitemlike(fill_value, self.values.dtype) - values = missing.interpolate_2d( - values, + interpolate_2d = functools.partial( + missing.interpolate_2d, method=method, - axis=axis, limit=limit, fill_value=fill_value, dtype=self.dtype, ) + if limit_area is None: + values = interpolate_2d(values, axis=axis) + else: + def func(values): + invalid = isna(values) + + if not invalid.any(): + return values + + if not invalid.all(): + first = missing.find_valid_index(values, "first") + last = missing.find_valid_index(values, "last") + + values = interpolate_2d(values) + + if limit_area == "inside": + invalid[first : last + 1] = False + elif limit_area == "outside": + invalid[:first] = False + invalid[last + 1 :] = False + + values[invalid] = np.nan + else: + values = interpolate_2d(values) + return values + + values = np.apply_along_axis(func, axis, values) + blocks = [self.make_block_same_class(values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) From d62e02ed2c05710029db0c577582bbca9e125b88 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 26 Aug 2020 23:12:59 +0200 Subject: [PATCH 25/37] black formatting --- pandas/core/internals/blocks.py | 1 + .../tests/series/methods/test_interpolate.py | 18 ++++++------------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1b9322d5049c5..783df00137753 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1190,6 +1190,7 @@ def _interpolate_with_fill( if limit_area is None: values = interpolate_2d(values, axis=axis) else: + def func(values): invalid = isna(values) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 46c727ecf16ad..cff6d601a8cd8 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -452,11 +452,9 @@ def test_interp_limit_direction_raises(self, method, limit_direction, expected): def test_interp_limit_area_with_pad(self): # Test for issue #26796 - s = Series( - [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - expected = Series( - [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) + expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="pad", limit_area="inside") tm.assert_series_equal(result, expected) @@ -466,8 +464,7 @@ def test_interp_limit_area_with_pad(self): result = s.interpolate(method="pad", limit_area="inside", limit=1) tm.assert_series_equal(result, expected) - expected = Series( - [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) result = s.interpolate(method="pad", limit_area="outside") tm.assert_series_equal(result, expected) @@ -479,11 +476,9 @@ def test_interp_limit_area_with_pad(self): def test_interp_limit_area_with_backfill(self): # Test for issue #26796 - s = Series( - [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - expected = Series( - [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) + expected = Series([np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="bfill", limit_area="inside") tm.assert_series_equal(result, expected) @@ -493,8 +488,7 @@ def test_interp_limit_area_with_backfill(self): result = s.interpolate(method="bfill", limit_area="inside", limit=1) tm.assert_series_equal(result, expected) - expected = Series( - [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) result = s.interpolate(method="bfill", limit_area="outside") tm.assert_series_equal(result, expected) From c2473f29fc892a7f1c9666d17c05607ab893a669 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 26 Aug 2020 23:32:14 +0200 Subject: [PATCH 26/37] added whatsnew entry --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index adc1806523d6e..503c3c37790fd 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -225,6 +225,7 @@ Missing ^^^^^^^ - Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`) +- Bug in :meth:`Series.interpolate` where kwarg ``limit_area`` and ``limit_direction`` had no effect when using methods ``pad`` and ``backfill`` (:issue:`31048`) - MultiIndex From 610e3471033cf34833de2ef27ae60a2a79804a3c Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Mon, 14 Sep 2020 23:01:10 +0200 Subject: [PATCH 27/37] Test with moving logic for interpolate_2d with `limite_area` directly into interpolate_2d --- pandas/core/internals/blocks.py | 36 +++--------------------- pandas/core/missing.py | 49 ++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 33 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 783df00137753..1d4569d4d0f53 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1,5 +1,4 @@ from datetime import datetime, timedelta -import functools import inspect import re from typing import TYPE_CHECKING, Any, List, Optional @@ -1179,43 +1178,16 @@ def _interpolate_with_fill( # We only get here for non-ExtensionBlock fill_value = convert_scalar_for_putitemlike(fill_value, self.values.dtype) - interpolate_2d = functools.partial( - missing.interpolate_2d, + values = missing.interpolate_2d( + values, method=method, + axis=axis, limit=limit, + limit_area=limit_area, fill_value=fill_value, dtype=self.dtype, ) - if limit_area is None: - values = interpolate_2d(values, axis=axis) - else: - - def func(values): - invalid = isna(values) - - if not invalid.any(): - return values - - if not invalid.all(): - first = missing.find_valid_index(values, "first") - last = missing.find_valid_index(values, "last") - - values = interpolate_2d(values) - - if limit_area == "inside": - invalid[first : last + 1] = False - elif limit_area == "outside": - invalid[:first] = False - invalid[last + 1 :] = False - - values[invalid] = np.nan - else: - values = interpolate_2d(values) - return values - - values = np.apply_along_axis(func, axis, values) - blocks = [self.make_block_same_class(values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7802c5cbdbfb3..ef8da603339cb 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -543,12 +543,59 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat def interpolate_2d( - values, method="pad", axis=0, limit=None, fill_value=None, dtype=None + values, + method="pad", + axis=0, + limit=None, + limit_area=None, + fill_value=None, + dtype=None, ): """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. """ + + if limit_area is not None: + + def func(values): + invalid = isna(values) + + if not invalid.any(): + return values + + if not invalid.all(): + first = find_valid_index(values, "first") + last = find_valid_index(values, "last") + + values = interpolate_2d( + values, + method=method, + limit=limit, + fill_value=fill_value, + dtype=dtype, + ) + + if limit_area == "inside": + invalid[first : last + 1] = False + elif limit_area == "outside": + invalid[:first] = False + invalid[last + 1 :] = False + + values[invalid] = np.nan + else: + values = interpolate_2d( + values, + method=method, + limit=limit, + fill_value=fill_value, + dtype=dtype, + ) + return values + + values = np.apply_along_axis(func, axis, values) + return values + orig_values = values transf = (lambda x: x) if axis == 0 else (lambda x: x.T) From 570e3c20f2f6af9b144f72f8b388214615b28575 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 15 Sep 2020 14:48:32 +0200 Subject: [PATCH 28/37] fix wrong arg order by using kwargs as suggested in https://github.com/pandas-dev/pandas/commit/75f3804982d912acef078e9bb5ec43806f596954 --- pandas/core/arrays/categorical.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a28b341669918..9c856e1da9f3d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1693,9 +1693,9 @@ def fillna(self, value=None, method=None, limit=None): # TODO: dispatch when self.categories is EA-dtype values = np.asarray(self).reshape(-1, len(self)) - values = interpolate_2d(values, method, 0, None, value).astype( - self.categories.dtype - )[0] + values = interpolate_2d( + values, method=method, axis=0, limit=None, fill_value=value, + ).astype(self.categories.dtype)[0] codes = _get_codes_for_values(values, self.categories) else: From 721304ab9b1993e3fa415bf70d4c842fee561d53 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 15 Sep 2020 16:26:04 +0200 Subject: [PATCH 29/37] Added comment to explain recursion and added typing for interpolate_2d --- pandas/core/missing.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ef8da603339cb..851c3aae73a3b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -7,6 +7,7 @@ import numpy as np from pandas._libs import algos, lib +from pandas._typing import Axis, Dtype, Hashable from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -544,18 +545,21 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat def interpolate_2d( values, - method="pad", - axis=0, - limit=None, - limit_area=None, - fill_value=None, - dtype=None, + method: str = "pad", + axis: Axis = 0, + limit: Optional[str] = None, + limit_area: Optional[str] = None, + fill_value: Optional[Hashable] = None, + dtype: Optional[Dtype] = None, ): """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. """ + # `limit_area` is not supported by `pad_2d` and `backfill_2d`. Hence, the + # following code block does a recursive call and applies the interpolation + # and `limit_area` logic along a certain axis. if limit_area is not None: def func(values): From 73ab1bfdfa9359938d13fccd9d7b8ea7af519500 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 15 Sep 2020 16:44:22 +0200 Subject: [PATCH 30/37] improved test code coverage --- pandas/tests/series/methods/test_interpolate.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index cff6d601a8cd8..ae9908d81e6cb 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -474,6 +474,18 @@ def test_interp_limit_area_with_pad(self): result = s.interpolate(method="pad", limit_area="outside", limit=1) tm.assert_series_equal(result, expected) + # Test for all NaNs + s = Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]) + expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]) + result = s.interpolate(method="pad", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + + # Test for no NaNs + s = Series([1, 2, 3, 4]) + expected = Series([1, 2, 3, 4]) + result = s.interpolate(method="pad", limit_area="outside", limit=1) + tm.assert_series_equal(result, expected) + def test_interp_limit_area_with_backfill(self): # Test for issue #26796 s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) From a33f629c9bbad07814cae3e88504fd881628e041 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Tue, 15 Sep 2020 17:39:34 +0200 Subject: [PATCH 31/37] fixed wrong typing --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 851c3aae73a3b..ca9c225118792 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -547,7 +547,7 @@ def interpolate_2d( values, method: str = "pad", axis: Axis = 0, - limit: Optional[str] = None, + limit: Optional[int] = None, limit_area: Optional[str] = None, fill_value: Optional[Hashable] = None, dtype: Optional[Dtype] = None, From cb9d56a235db57f07b8da447b448964e2c21b16f Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 29 Nov 2020 23:49:42 -0500 Subject: [PATCH 32/37] move func to module level --- pandas/core/missing.py | 87 +++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index dd822c0d584fa..c347f9f892d72 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,13 +1,12 @@ """ Routines for filling missing data. """ - +from functools import partial from typing import Any, List, Optional, Set, Union import numpy as np from pandas._libs import algos, lib - from pandas._typing import ArrayLike, Axis, DtypeObj from pandas.compat._optional import import_optional_dependency @@ -529,6 +528,43 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat return P(x) +def _interpolate_with_limit_area(values, method, limit, limit_area): + + # `limit_area` is not supported by `pad_2d` and `backfill_2d`. Hence, the + # following code block does a recursive call and applies the interpolation + # and `limit_area` logic along a certain axis. + + invalid = isna(values) + + if not invalid.any(): + return values + + if not invalid.all(): + first = find_valid_index(values, "first") + last = find_valid_index(values, "last") + + values = interpolate_2d( + values, + method=method, + limit=limit, + ) + + if limit_area == "inside": + invalid[first : last + 1] = False + elif limit_area == "outside": + invalid[:first] = False + invalid[last + 1 :] = False + + values[invalid] = np.nan + else: + values = interpolate_2d( + values, + method=method, + limit=limit, + ) + return values + + def interpolate_2d( values, method: str = "pad", @@ -540,45 +576,18 @@ def interpolate_2d( Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. """ - - # `limit_area` is not supported by `pad_2d` and `backfill_2d`. Hence, the - # following code block does a recursive call and applies the interpolation - # and `limit_area` logic along a certain axis. if limit_area is not None: - def func(values): - invalid = isna(values) - - if not invalid.any(): - return values - - if not invalid.all(): - first = find_valid_index(values, "first") - last = find_valid_index(values, "last") - - values = interpolate_2d( - values, - method=method, - limit=limit, - ) - - if limit_area == "inside": - invalid[first : last + 1] = False - elif limit_area == "outside": - invalid[:first] = False - invalid[last + 1 :] = False - - values[invalid] = np.nan - else: - values = interpolate_2d( - values, - method=method, - limit=limit, - ) - return values - - values = np.apply_along_axis(func, axis, values) - return values + return np.apply_along_axis( + partial( + _interpolate_with_limit_area, + method=method, + limit=limit, + limit_area=limit_area, + ), + axis, + values, + ) orig_values = values From 3aa6efa57b48ccb6ad6b2732a2bbad4cea2c6d8f Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 29 Nov 2020 23:57:14 -0500 Subject: [PATCH 33/37] typing --- pandas/core/missing.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c347f9f892d72..222ff344908e7 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -528,18 +528,18 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat return P(x) -def _interpolate_with_limit_area(values, method, limit, limit_area): - - # `limit_area` is not supported by `pad_2d` and `backfill_2d`. Hence, the - # following code block does a recursive call and applies the interpolation - # and `limit_area` logic along a certain axis. +def _interpolate_with_limit_area( + values, method: str, limit: Optional[int], limit_area: Optional[str] +): + """ + Apply interpolation and limit_area logic to values along a to-be-specified axis. + """ invalid = isna(values) if not invalid.any(): - return values - - if not invalid.all(): + pass + elif not invalid.all(): first = find_valid_index(values, "first") last = find_valid_index(values, "last") @@ -552,8 +552,7 @@ def _interpolate_with_limit_area(values, method, limit, limit_area): if limit_area == "inside": invalid[first : last + 1] = False elif limit_area == "outside": - invalid[:first] = False - invalid[last + 1 :] = False + invalid[:first] = invalid[last + 1 :] = False values[invalid] = np.nan else: @@ -577,7 +576,6 @@ def interpolate_2d( needed fills inplace, returns the result. """ if limit_area is not None: - return np.apply_along_axis( partial( _interpolate_with_limit_area, From 47064b6e1ea6ad567c914622e7fd60ccf226edbc Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 30 Nov 2020 00:20:10 -0500 Subject: [PATCH 34/37] parametrize tests --- .../tests/series/methods/test_interpolate.py | 124 ++++++++++-------- 1 file changed, 70 insertions(+), 54 deletions(-) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 152385bed37f6..8740a309eec13 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -458,64 +458,80 @@ def test_interp_limit_direction_raises(self, method, limit_direction, expected): with pytest.raises(ValueError, match=msg): s.interpolate(method=method, limit_direction=limit_direction) - def test_interp_limit_area_with_pad(self): - # Test for issue #26796 - s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - - expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan]) - result = s.interpolate(method="pad", limit_area="inside") - tm.assert_series_equal(result, expected) - - expected = Series( - [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan] - ) - result = s.interpolate(method="pad", limit_area="inside", limit=1) - tm.assert_series_equal(result, expected) - - expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) - result = s.interpolate(method="pad", limit_area="outside") - tm.assert_series_equal(result, expected) - - expected = Series( - [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] - ) - result = s.interpolate(method="pad", limit_area="outside", limit=1) - tm.assert_series_equal(result, expected) - - # Test for all NaNs - s = Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]) - expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]) - result = s.interpolate(method="pad", limit_area="outside", limit=1) - tm.assert_series_equal(result, expected) - - # Test for no NaNs - s = Series([1, 2, 3, 4]) - expected = Series([1, 2, 3, 4]) - result = s.interpolate(method="pad", limit_area="outside", limit=1) - tm.assert_series_equal(result, expected) - - def test_interp_limit_area_with_backfill(self): - # Test for issue #26796 - s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - - expected = Series([np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan]) - result = s.interpolate(method="bfill", limit_area="inside") - tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data, expected_data, kwargs", + ( + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan], + {"method": "pad", "limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "pad", "limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0], + {"method": "pad", "limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan], + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ( + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ( + range(5), + range(5), + {"method": "pad", "limit_area": "outside", "limit": 1}, + ), + ), + ) + def test_interp_limit_area_with_pad(self, data, expected_data, kwargs): + # GH26796 - expected = Series( - [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan] - ) - result = s.interpolate(method="bfill", limit_area="inside", limit=1) + s = Series(data) + expected = Series(expected_data) + result = s.interpolate(**kwargs) tm.assert_series_equal(result, expected) - expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) - result = s.interpolate(method="bfill", limit_area="outside") - tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data, expected_data, kwargs", + ( + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "inside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "inside", "limit": 1}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "outside"}, + ), + ( + [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], + [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], + {"method": "bfill", "limit_area": "outside", "limit": 1}, + ), + ), + ) + def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs): + # GH26796 - expected = Series( - [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan] - ) - result = s.interpolate(method="bfill", limit_area="outside", limit=1) + s = Series(data) + expected = Series(expected_data) + result = s.interpolate(**kwargs) tm.assert_series_equal(result, expected) def test_interp_limit_direction(self): From 8a01725f3acde66fce448d199f68d3ae032593c6 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 1 Dec 2020 01:21:24 -0500 Subject: [PATCH 35/37] docstring --- pandas/core/missing.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 222ff344908e7..d07f38fb6832a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -533,6 +533,22 @@ def _interpolate_with_limit_area( ): """ Apply interpolation and limit_area logic to values along a to-be-specified axis. + + Parameters + ---------- + values: array-like + Input array. + method: str + Interpolation method. Could be "bfill" or "pad" + limit: int, optional + Index limit on interpolation. + limit_area: str + Limit area for interpolation. Can be "inside" or "outside" + + Returns + ------- + values: array-like + Interpolated array. """ invalid = isna(values) @@ -574,6 +590,24 @@ def interpolate_2d( """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. + + Parameters + ---------- + values: array-like + Input array. + method: str, default "pad" + Interpolation method. Could be "bfill" or "pad" + axis: 0 or 1 + Interpolation axis + limit: int, optional + Index limit on interpolation. + limit_area: str, optional + Limit area for interpolation. Can be "inside" or "outside" + + Returns + ------- + values: array-like + Interpolated array. """ if limit_area is not None: return np.apply_along_axis( From 69bae745a1d2bfded7646046bf6b22447d83e42b Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 1 Dec 2020 01:30:43 -0500 Subject: [PATCH 36/37] typing --- pandas/core/missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d07f38fb6832a..27804b4972082 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -529,8 +529,8 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat def _interpolate_with_limit_area( - values, method: str, limit: Optional[int], limit_area: Optional[str] -): + values: ArrayLike, method: str, limit: Optional[int], limit_area: Optional[str] +) -> ArrayLike: """ Apply interpolation and limit_area logic to values along a to-be-specified axis. From cc898a99510f7f00f04aa31195373a3561d05b3b Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 1 Dec 2020 12:29:38 -0500 Subject: [PATCH 37/37] refactor --- pandas/core/missing.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 27804b4972082..e374ba435a0bd 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -553,9 +553,7 @@ def _interpolate_with_limit_area( invalid = isna(values) - if not invalid.any(): - pass - elif not invalid.all(): + if not invalid.all(): first = find_valid_index(values, "first") last = find_valid_index(values, "last") @@ -571,12 +569,7 @@ def _interpolate_with_limit_area( invalid[:first] = invalid[last + 1 :] = False values[invalid] = np.nan - else: - values = interpolate_2d( - values, - method=method, - limit=limit, - ) + return values