From 7c230aa775e0b01d69012c809705f9d7bf98d308 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Tue, 5 Nov 2019 23:40:37 +0100 Subject: [PATCH 01/22] add pad method to Variable and add corresponding test --- xarray/core/duck_array_ops.py | 2 ++ xarray/core/variable.py | 33 ++++++++++++++++++++++++++++++- xarray/tests/test_variable.py | 37 ++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d943788c434..9d85d1430f1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -454,3 +454,5 @@ def rolling_window(array, axis, window, center, fill_value): return dask_array_ops.rolling_window(array, axis, window, center, fill_value) else: # np.ndarray return nputils.rolling_window(array, axis, window, center, fill_value) + +pad = _dask_or_eager_func("pad") diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 117ab85ae65..9796efa3d36 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -3,7 +3,7 @@ from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion -from typing import Any, Dict, Hashable, Mapping, TypeVar, Union +from typing import Any, Dict, Hashable, Mapping, TypeVar, Union, Tuple import numpy as np import pandas as pd @@ -1099,6 +1099,37 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): result = result._shift_one_dim(dim, count, fill_value=fill_value) return result + + def pad(self, + pad_widths: Mapping[Hashable, Tuple[int, int]] = None, + mode: str = "constant", + **kwargs: Any): + + # pop optional arguments from kwargs, so pad_width_kwargs remain + opt_kwargs_names = ["stat_length", "constant_values", "end_values", "reflect_type"] + opt_kwargs = {name: kwargs.pop(name) for name in opt_kwargs_names if name in kwargs} + + # workaround for Dask's default value of stat_length + if mode in ["maximum", "mean", "median", "minimum"]: + opt_kwargs.setdefault("stat_length", tuple((n, n) for n in self.data.shape)) + + dtype = self.dtype + if mode == "constant": + if "constant_values" not in opt_kwargs: + dtype, opt_kwargs["constant_values"] = dtypes.maybe_promote(self.dtype) + + pad_widths = either_dict_or_kwargs(pad_widths, kwargs, "pad") + pads = [(0, 0) if d not in pad_widths else pad_widths[d] for d in self.dims] + + array = duck_array_ops.pad( + self.data.astype(dtype, copy=False), + pads, + mode=mode, + **opt_kwargs + ) + + return type(self)(self.dims, array) + def pad_with_fill_value( self, pad_widths=None, fill_value=dtypes.NA, **pad_widths_kwargs ): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 528027ed149..32c17830808 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2,6 +2,8 @@ from copy import copy, deepcopy from datetime import datetime, timedelta from textwrap import dedent +import itertools +from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -782,6 +784,30 @@ def test_pad(self): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) + # TODO "mean", "median" and "reflect" have issues + modes = ["edge", "linear_ramp", "maximum", "minimum", "symmetric", "wrap"] + xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] + np_args = [ + ((2, 1), (0, 0), (0, 0)), + ((0, 0), (0, 3), (0, 0)), + ((3, 1), (0, 0), (2, 0)), + ] + for (xr_arg, np_arg), mode in itertools.product(zip(xr_args, np_args), modes): + print(mode) + actual = v.pad(mode=mode, **xr_arg) + expected = np.pad( + np.array(v.data), + np_arg, + mode=mode, + ) + assert_array_equal(actual, expected) + assert isinstance(actual._data, type(v._data)) + assert type(actual._data) == type(v._data) + + def test_pad_constant_values(self): + data = np.arange(4 * 3 * 2).reshape(4, 3, 2) + v = self.cls(["x", "y", "z"], data) + xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] np_args = [ ((2, 1), (0, 0), (0, 0)), @@ -789,7 +815,7 @@ def test_pad(self): ((3, 1), (0, 0), (2, 0)), ] for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(**xr_arg) + actual = v.pad(**xr_arg) expected = np.pad( np.array(v.data.astype(float)), np_arg, @@ -1848,6 +1874,11 @@ def test_getitem_with_mask_nd_indexer(self): self.cls(("x", "y"), [[0, -1], [-1, 2]]), ) + def test_pad(self): + import dask + if LooseVersion(dask.__version__) < "0.18.1": + pytest.skip("padding was added in Dask version 0.18.1 ") + super().test_pad() class TestIndexVariable(VariableSubclassobjects): cls = staticmethod(IndexVariable) @@ -1957,6 +1988,10 @@ def test_getitem_uint(self): def test_pad(self): super().test_rolling_window() + @pytest.mark.xfail + def test_pad_constant_values(self): + super().test_rolling_window() + @pytest.mark.xfail def test_rolling_window(self): super().test_rolling_window() From 59802343c5aec72c51bf14c668fa22052a4d1736 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Mon, 18 Nov 2019 23:54:09 +0100 Subject: [PATCH 02/22] move pad_with_fill value to dask_array_compat.py and make it default to dask.array.pad --- xarray/core/dask_array_compat.py | 56 ++++++++++++++++++++++++ xarray/core/duck_array_ops.py | 4 +- xarray/core/rolling.py | 2 +- xarray/core/variable.py | 74 +++----------------------------- xarray/tests/test_sparse.py | 2 +- xarray/tests/test_variable.py | 2 +- 6 files changed, 68 insertions(+), 72 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index c3dbdd27098..1167a964ed9 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -89,3 +89,59 @@ def meta_from_array(x, ndim=None, dtype=None): meta = meta.astype(dtype) return meta + + +# TODO figure out how Dask versioning works +# if LooseVersion(dask_version) >= LooseVersion("1.7.0"): +try: + pad = da.pad +except AttributeError: + def pad(array, pad_width, mode="constant", **kwargs): + """ + Return a new dask.DataArray wit padding. This functions implements a + constant padding for versions of Dask that do not implement this yet. + + Parameters + ---------- + array: Array to pad + + pad_width: List of the form [(before, after)] + Number of values padded to the edges of axis. + """ + if mode != "constant": + raise NotImplementedError() # TODO add error message + + try: + fill_value = kwargs["constant_values"] + dtype = array.dtype + except KeyError: + dtype, fill_value = dtypes.maybe_promote(array.dtype) + + for axis, pad in enumerate(pad_width): + before_shape = list(array.shape) + before_shape[axis] = pad[0] + before_chunks = list(array.chunks) + before_chunks[axis] = (pad[0],) + after_shape = list(array.shape) + after_shape[axis] = pad[1] + after_chunks = list(array.chunks) + after_chunks[axis] = (pad[1],) + + arrays = [] + if pad[0] > 0: + arrays.append( + da.full( + before_shape, fill_value, dtype=dtype, chunks=before_chunks + ) + ) + arrays.append(array) + if pad[1] > 0: + arrays.append( + da.full( + after_shape, fill_value, dtype=dtype, chunks=after_chunks + ) + ) + if len(arrays) > 1: + array = da.concatenate(arrays, axis=axis) + + return array diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 9d85d1430f1..6003dd432b6 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -17,8 +17,10 @@ try: import dask.array as dask_array + from . import dask_array_compat except ImportError: dask_array = None # type: ignore + dask_array_compat = None def _dask_or_eager_func( @@ -455,4 +457,4 @@ def rolling_window(array, axis, window, center, fill_value): else: # np.ndarray return nputils.rolling_window(array, axis, window, center, fill_value) -pad = _dask_or_eager_func("pad") +pad = _dask_or_eager_func("pad", dask_module=dask_array_compat) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index f4e571a8efe..08e11d4cc73 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -330,7 +330,7 @@ def _bottleneck_reduce(self, func, **kwargs): else: shift = (-self.window // 2) + 1 valid = (slice(None),) * axis + (slice(-shift, None),) - padded = padded.pad_with_fill_value({self.dim: (0, -shift)}) + padded = padded.pad({self.dim: (0, -shift)}, mode="constant") if isinstance(padded.data, dask_array_type): raise AssertionError("should not be reachable") diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9796efa3d36..d8aee2c897b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1099,7 +1099,6 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): result = result._shift_one_dim(dim, count, fill_value=fill_value) return result - def pad(self, pad_widths: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", @@ -1113,10 +1112,11 @@ def pad(self, if mode in ["maximum", "mean", "median", "minimum"]: opt_kwargs.setdefault("stat_length", tuple((n, n) for n in self.data.shape)) - dtype = self.dtype - if mode == "constant": - if "constant_values" not in opt_kwargs: - dtype, opt_kwargs["constant_values"] = dtypes.maybe_promote(self.dtype) + + if mode == "constant" and "constant_values" not in opt_kwargs: + dtype, opt_kwargs["constant_values"] = dtypes.maybe_promote(self.dtype) + else: + dtype = self.dtype pad_widths = either_dict_or_kwargs(pad_widths, kwargs, "pad") pads = [(0, 0) if d not in pad_widths else pad_widths[d] for d in self.dims] @@ -1130,68 +1130,6 @@ def pad(self, return type(self)(self.dims, array) - def pad_with_fill_value( - self, pad_widths=None, fill_value=dtypes.NA, **pad_widths_kwargs - ): - """ - Return a new Variable with paddings. - - Parameters - ---------- - pad_width: Mapping of the form {dim: (before, after)} - Number of values padded to the edges of each dimension. - **pad_widths_kwargs: - Keyword argument for pad_widths - """ - pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") - - if fill_value is dtypes.NA: - dtype, fill_value = dtypes.maybe_promote(self.dtype) - else: - dtype = self.dtype - - if isinstance(self.data, dask_array_type): - array = self.data - - # Dask does not yet support pad. We manually implement it. - # https://github.com/dask/dask/issues/1926 - for d, pad in pad_widths.items(): - axis = self.get_axis_num(d) - before_shape = list(array.shape) - before_shape[axis] = pad[0] - before_chunks = list(array.chunks) - before_chunks[axis] = (pad[0],) - after_shape = list(array.shape) - after_shape[axis] = pad[1] - after_chunks = list(array.chunks) - after_chunks[axis] = (pad[1],) - - arrays = [] - if pad[0] > 0: - arrays.append( - da.full( - before_shape, fill_value, dtype=dtype, chunks=before_chunks - ) - ) - arrays.append(array) - if pad[1] > 0: - arrays.append( - da.full( - after_shape, fill_value, dtype=dtype, chunks=after_chunks - ) - ) - if len(arrays) > 1: - array = da.concatenate(arrays, axis=axis) - else: - pads = [(0, 0) if d not in pad_widths else pad_widths[d] for d in self.dims] - array = np.pad( - self.data.astype(dtype, copy=False), - pads, - mode="constant", - constant_values=fill_value, - ) - return type(self)(self.dims, array) - def _roll_one_dim(self, dim, count): axis = self.get_axis_num(dim) @@ -1887,7 +1825,7 @@ def _coarsen_reshape(self, windows, boundary, side): pad_widths = {d: (0, pad)} else: pad_widths = {d: (pad, 0)} - variable = variable.pad_with_fill_value(pad_widths) + variable = variable.pad(pad_widths, mode="constant") else: raise TypeError( "{} is invalid for boundary. Valid option is 'exact', " diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 8e2d4b8e064..33e6947b95f 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -175,7 +175,7 @@ def test_variable_property(prop): marks=xfail(reason="mixed sparse-dense operation"), ), param( - do("pad_with_fill_value", pad_widths={"x": (1, 1)}, fill_value=5), + do("pad", mode="constant", pad_widths={"x": (1, 1)}, fill_value=5), True, marks=xfail(reason="Missing implementation for np.pad"), ), diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 32c17830808..03352cc1a8f 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -829,7 +829,7 @@ def test_pad_constant_values(self): data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(fill_value=False, **xr_arg) + actual = v.pad(mode="constant", constant_values=False, **xr_arg) expected = np.pad( np.array(v.data), np_arg, mode="constant", constant_values=False ) From b6a979b8df560c2804ca862b25d581d9e22a14b1 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Thu, 21 Nov 2019 00:19:50 +0100 Subject: [PATCH 03/22] add pad method to dataarray --- xarray/core/dataarray.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b61f83bcb1c..7bdbbd0cfa8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3110,6 +3110,38 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def pad( + self, + pad_widths: Mapping[Hashable, Tuple[int,int]] = None, + mode: str = "constant", + pad_options: dict = {}, + **pad_widths_kwargs: Any, + ) -> "DataArray": + """ + + """ + pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") + + variable = self.variable.pad( + pad_widths=pad_widths, mode=mode, **pad_options + ) + + if mode in ("edge", "reflect", "symmetric", "wrap"): + coord_pad_mode = mode + coord_pad_options = pad_options + else: + coord_pad_mode = "constant" + coord_pad_options = {} + + coords = {} + for name, dim in self.coords.items(): + if name in pad_widths: + coords[name] = dim.variable.pad({name: pad_widths[name]}, mode=coord_pad_mode, **coord_pad_options) + else: + coords[name] = as_variable(dim, name=name) + + return self._replace(variable=variable, coords=coords) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = property(StringAccessor) From 80abc3aefa7e754181cad3767583c85e9e6fdae9 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Thu, 28 Nov 2019 21:11:18 +0100 Subject: [PATCH 04/22] add docstrings for variable.pad and dataarray.pad --- xarray/core/dask_array_compat.py | 6 +- xarray/core/dataarray.py | 104 ++++++++++++++++++++++++++++++- xarray/core/variable.py | 20 +++++- 3 files changed, 126 insertions(+), 4 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 1167a964ed9..dcad6542a97 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -109,7 +109,11 @@ def pad(array, pad_width, mode="constant", **kwargs): Number of values padded to the edges of axis. """ if mode != "constant": - raise NotImplementedError() # TODO add error message + raise NotImplementedError( + "Pad is not yet implemented for your current version of Dask. " + "Please update your version of Dask or use the " + "mode=`constant`, that is added by xarray." + ) try: fill_value = kwargs["constant_values"] diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7bdbbd0cfa8..b1fdacc552a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3114,11 +3114,111 @@ def pad( self, pad_widths: Mapping[Hashable, Tuple[int,int]] = None, mode: str = "constant", - pad_options: dict = {}, + pad_options: Dict[str, Any] = {}, **pad_widths_kwargs: Any, ) -> "DataArray": - """ + """Pad this array along one or more dimensions. + + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), + coordinates will be padded with the same mode, otherwise coordinates + are padded using the "constant" mode with fill_value dtypes.NA. + + Parameters + ---------- + pad_widths : Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + mode : str (taken from numpy docs) + One of the following string values or a user supplied function. + 'constant' (default) + Pads with a constant value. + 'edge' + Pads with the edge values of array. + 'linear_ramp' + Pads with the linear ramp between end_value and the + array edge value. + 'maximum' + Pads with the maximum value of all or part of the + vector along each axis. + 'mean' + Pads with the mean value of all or part of the + vector along each axis. + 'median' + Pads with the median value of all or part of the + vector along each axis. + 'minimum' + Pads with the minimum value of all or part of the + vector along each axis. + 'reflect' + Pads with the reflection of the vector mirrored on + the first and last values of the vector along each + axis. + 'symmetric' + Pads with the reflection of the vector mirrored + along the edge of the array. + 'wrap' + Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + pad_options : additional keyword arguments that are passed to pad function + stat_length : sequence or int, optional + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + ((before_1, after_1), ... (before_N, after_N)) unique statistic + lengths for each axis. + ((before, after),) yields same before and after statistic lengths + for each axis. + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + Default is ``None``, to use the entire axis. + constant_values : sequence or scalar, optional + Used in 'constant'. The values to set the padded values for each + axis. + ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants + for each axis. + ``((before, after),)`` yields same before and after constants for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + end_values : sequence or scalar, optional + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + ``((before_1, after_1), ... (before_N, after_N))`` unique end values + for each axis. + ``((before, after),)`` yields same before and after end values for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + + **pad_widths_kwargs: + The keyword arguments form of ``pad_widths``. + One of pad_widths or pad_widths_kwarg must be provided. + Returns + ------- + padded : DataArray + DataArray with the padded coordinates and data. + + See also + -------- + shift + roll + + Examples + -------- + + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) + >>> arr.pad(x=(1,2), pad_options={"constant_values" : 0}) + + array([0, 5, 6, 7, 0, 0]) + Coordinates: + * x (x) float64 nan 0.0 1.0 2.0 nan nan """ pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") diff --git a/xarray/core/variable.py b/xarray/core/variable.py index d8aee2c897b..2763ae11a3b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1103,6 +1103,25 @@ def pad(self, pad_widths: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", **kwargs: Any): + """ + Return a new Variable with padded data. + + Parameters + ---------- + pad_widths: Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + mode: (str) + See numpy / Dask docs + **kwargs: + A combination of the optional arguments for np.pad/dask.pad + and the keyword arguments form of ``pad_widths``. + One of pad_widths or pad_widths_kwarg must be provided. + + Returns + ------- + padded : Variable + Variable with the same dimensions and attributes but padded data. + """ # pop optional arguments from kwargs, so pad_width_kwargs remain opt_kwargs_names = ["stat_length", "constant_values", "end_values", "reflect_type"] @@ -1112,7 +1131,6 @@ def pad(self, if mode in ["maximum", "mean", "median", "minimum"]: opt_kwargs.setdefault("stat_length", tuple((n, n) for n in self.data.shape)) - if mode == "constant" and "constant_values" not in opt_kwargs: dtype, opt_kwargs["constant_values"] = dtypes.maybe_promote(self.dtype) else: From ed3d88e0d003c8b1e74a1634630274b2d2eb94d3 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Tue, 3 Dec 2019 22:51:56 +0100 Subject: [PATCH 05/22] add tests for DataArray.pad --- xarray/tests/test_dataarray.py | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5114d13b0dc..20199adc345 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4053,6 +4053,60 @@ def test_rank(self): y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",)) assert_equal(y.rank("z", pct=True), y) + def test_pad_constant(self): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad(dim_0=(1, 3)) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5).astype(np.float32), + pad_width=((1, 3), (0, 0), (0, 0)), + constant_values=np.nan, + ) + ) + assert_identical(actual, expected) + + @pytest.mark.parametrize("mode", ("minimum", "maximum", "mean", "median")) + @pytest.mark.parametrize("stat_length", (None, 3, (1, 3))) + def test_pad_stat_length(self, mode, stat_length): + + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad( + dim_0=(1, 3), + dim_2=(2, 2), + mode=mode, + pad_options=dict(stat_length=stat_length), + ) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + pad_width=((1, 3), (0, 0), (2, 2)), + mode=mode, + stat_length=stat_length, + ) + ) + assert_identical(actual, expected) + + @pytest.mark.parametrize("mode", ("reflect", "symmetric")) + @pytest.mark.parametrize("reflect_type", (None, "even", "odd")) + def test_pad_reflect(self, mode, reflect_type): + + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad( + dim_0=(1, 3), + dim_2=(2, 2), + mode=mode, + pad_options=dict(reflect_type=reflect_type), + ) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + pad_width=((1, 3), (0, 0), (2, 2)), + mode=mode, + reflect_type=reflect_type, + ) + ) + assert_identical(actual, expected) + @pytest.fixture(params=[1]) def da(request): From d4e484dd3479d84fb44c1f5c0be23bc8b541bcec Mon Sep 17 00:00:00 2001 From: mark-boer Date: Wed, 4 Dec 2019 21:56:59 +0100 Subject: [PATCH 06/22] improve pad method signature and support dictionaries as pad_options instead of list of tuples --- xarray/core/dataarray.py | 102 ++++++++++++++++++++------------- xarray/core/variable.py | 72 ++++++++++++++++------- xarray/tests/test_dataarray.py | 12 +--- xarray/tests/test_variable.py | 1 - 4 files changed, 115 insertions(+), 72 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b1fdacc552a..ce0f719eae0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3112,9 +3112,18 @@ def map_blocks( def pad( self, - pad_widths: Mapping[Hashable, Tuple[int,int]] = None, + pad_widths: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", - pad_options: Dict[str, Any] = {}, + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, **pad_widths_kwargs: Any, ) -> "DataArray": """Pad this array along one or more dimensions. @@ -3159,42 +3168,41 @@ def pad( Pads with the wrap of the vector along the axis. The first values are used to pad the end and the end values are used to pad the beginning. - pad_options : additional keyword arguments that are passed to pad function - stat_length : sequence or int, optional - Used in 'maximum', 'mean', 'median', and 'minimum'. Number of - values at edge of each axis used to calculate the statistic value. - ((before_1, after_1), ... (before_N, after_N)) unique statistic - lengths for each axis. - ((before, after),) yields same before and after statistic lengths - for each axis. - (stat_length,) or int is a shortcut for before = after = statistic - length for all axes. - Default is ``None``, to use the entire axis. - constant_values : sequence or scalar, optional - Used in 'constant'. The values to set the padded values for each - axis. - ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants - for each axis. - ``((before, after),)`` yields same before and after constants for each - axis. - ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for - all axes. - Default is 0. - end_values : sequence or scalar, optional - Used in 'linear_ramp'. The values used for the ending value of the - linear_ramp and that will form the edge of the padded array. - ``((before_1, after_1), ... (before_N, after_N))`` unique end values - for each axis. - ``((before, after),)`` yields same before and after end values for each - axis. - ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for - all axes. - Default is 0. - reflect_type : {'even', 'odd'}, optional - Used in 'reflect', and 'symmetric'. The 'even' style is the - default with an unaltered reflection around the edge value. For - the 'odd' style, the extended part of the array is created by - subtracting the reflected values from two times the edge value. + stat_length : sequence or int, optional + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + ((before_1, after_1), ... (before_N, after_N)) unique statistic + lengths for each axis. + ((before, after),) yields same before and after statistic lengths + for each axis. + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + Default is ``None``, to use the entire axis. + constant_values : sequence or scalar, optional + Used in 'constant'. The values to set the padded values for each + axis. + ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants + for each axis. + ``((before, after),)`` yields same before and after constants for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + end_values : sequence or scalar, optional + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + ``((before_1, after_1), ... (before_N, after_N))`` unique end values + for each axis. + ``((before, after),)`` yields same before and after end values for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. **pad_widths_kwargs: The keyword arguments form of ``pad_widths``. @@ -3223,12 +3231,22 @@ def pad( pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") variable = self.variable.pad( - pad_widths=pad_widths, mode=mode, **pad_options + pad_widths=pad_widths, + mode=mode, + stat_length=stat_length, + constant_values=constant_values, + end_values=end_values, + reflect_type=reflect_type, ) if mode in ("edge", "reflect", "symmetric", "wrap"): coord_pad_mode = mode - coord_pad_options = pad_options + coord_pad_options = { + "stat_length": stat_length, + "constant_values": constant_values, + "end_values": end_values, + "reflect_type": reflect_type, + } else: coord_pad_mode = "constant" coord_pad_options = {} @@ -3236,7 +3254,9 @@ def pad( coords = {} for name, dim in self.coords.items(): if name in pad_widths: - coords[name] = dim.variable.pad({name: pad_widths[name]}, mode=coord_pad_mode, **coord_pad_options) + coords[name] = dim.variable.pad( + {name: pad_widths[name]}, mode=coord_pad_mode, **coord_pad_options + ) else: coords[name] = as_variable(dim, name=name) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2763ae11a3b..beb2bf4d7a0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1099,10 +1099,22 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): result = result._shift_one_dim(dim, count, fill_value=fill_value) return result - def pad(self, + def pad( + self, pad_widths: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", - **kwargs: Any): + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_widths_kwargs: Any, + ): """ Return a new Variable with padded data. @@ -1112,9 +1124,7 @@ def pad(self, Number of values padded along each dimension. mode: (str) See numpy / Dask docs - **kwargs: - A combination of the optional arguments for np.pad/dask.pad - and the keyword arguments form of ``pad_widths``. + **pad_widths_kwarg: One of pad_widths or pad_widths_kwarg must be provided. Returns @@ -1122,28 +1132,50 @@ def pad(self, padded : Variable Variable with the same dimensions and attributes but padded data. """ + pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") - # pop optional arguments from kwargs, so pad_width_kwargs remain - opt_kwargs_names = ["stat_length", "constant_values", "end_values", "reflect_type"] - opt_kwargs = {name: kwargs.pop(name) for name in opt_kwargs_names if name in kwargs} - - # workaround for Dask's default value of stat_length - if mode in ["maximum", "mean", "median", "minimum"]: - opt_kwargs.setdefault("stat_length", tuple((n, n) for n in self.data.shape)) - - if mode == "constant" and "constant_values" not in opt_kwargs: - dtype, opt_kwargs["constant_values"] = dtypes.maybe_promote(self.dtype) + # change default behaviour of pad with mode constant + if mode == "constant" and constant_values is None: + dtype, constant_values = dtypes.maybe_promote(self.dtype) else: dtype = self.dtype - pad_widths = either_dict_or_kwargs(pad_widths, kwargs, "pad") + # create pad_options_kwargs, numpy requires only relevant kwargs to be nonempty + if isinstance(stat_length, dict): + stat_length = [ + (n, n) if d not in stat_length else stat_length[d] + for d, n in zip(self.dims, self.data.shape) + ] + if isinstance(constant_values, dict): + constant_values = [ + (0, 0) if d not in constant_values else constant_values[d] + for d, n in zip(self.dims, self.data.shape) + ] + if isinstance(end_values, dict): + end_values = [ + (0, 0) if d not in end_values else end_values[d] + for d, n in zip(self.dims, self.data.shape) + ] + + # workaround for bug in Dask's default value of stat_length https://github.com/dask/dask/issues/5303 + if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]: + stat_length = [(n, n) for n in self.data.shape] + pads = [(0, 0) if d not in pad_widths else pad_widths[d] for d in self.dims] + # numpy/dask work with optional kwargs + pad_option_kwargs = {} + if stat_length is not None: + pad_option_kwargs["stat_length"] = stat_length + if constant_values is not None: + pad_option_kwargs["constant_values"] = constant_values + if end_values is not None: + pad_option_kwargs["end_values"] = end_values + if reflect_type is not None: + pad_option_kwargs["reflect_type"] = reflect_type + array = duck_array_ops.pad( - self.data.astype(dtype, copy=False), - pads, - mode=mode, - **opt_kwargs + self.data.astype(dtype, copy=False), pads, mode=mode, **pad_option_kwargs ) return type(self)(self.dims, array) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 20199adc345..a7bdd2c6339 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4070,12 +4070,7 @@ def test_pad_constant(self): def test_pad_stat_length(self, mode, stat_length): ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) - actual = ar.pad( - dim_0=(1, 3), - dim_2=(2, 2), - mode=mode, - pad_options=dict(stat_length=stat_length), - ) + actual = ar.pad(dim_0=(1, 3), dim_2=(2, 2), mode=mode, stat_length=stat_length) expected = DataArray( np.pad( np.arange(3 * 4 * 5).reshape(3, 4, 5), @@ -4092,10 +4087,7 @@ def test_pad_reflect(self, mode, reflect_type): ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad( - dim_0=(1, 3), - dim_2=(2, 2), - mode=mode, - pad_options=dict(reflect_type=reflect_type), + dim_0=(1, 3), dim_2=(2, 2), mode=mode, reflect_type=reflect_type ) expected = DataArray( np.pad( diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 03352cc1a8f..5cfb67bc641 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -796,7 +796,6 @@ def test_pad(self): print(mode) actual = v.pad(mode=mode, **xr_arg) expected = np.pad( - np.array(v.data), np_arg, mode=mode, ) From 65d74952ae8f4e90b8f8092af746ad0a307a85fd Mon Sep 17 00:00:00 2001 From: mark-boer Date: Mon, 9 Dec 2019 00:03:59 +0100 Subject: [PATCH 07/22] fix linting errors and remove typo from tests --- xarray/core/dask_array_compat.py | 11 +++++------ xarray/core/dataarray.py | 6 +++--- xarray/core/duck_array_ops.py | 3 ++- xarray/core/variable.py | 6 +++--- xarray/tests/test_dataarray.py | 18 ++++++++++-------- xarray/tests/test_variable.py | 13 +++++-------- 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index dcad6542a97..d9c8a672084 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -4,6 +4,8 @@ import numpy as np from dask import __version__ as dask_version +from . import dtypes + if LooseVersion(dask_version) >= LooseVersion("2.0.0"): meta_from_array = da.utils.meta_from_array else: @@ -96,6 +98,7 @@ def meta_from_array(x, ndim=None, dtype=None): try: pad = da.pad except AttributeError: + def pad(array, pad_width, mode="constant", **kwargs): """ Return a new dask.DataArray wit padding. This functions implements a @@ -134,16 +137,12 @@ def pad(array, pad_width, mode="constant", **kwargs): arrays = [] if pad[0] > 0: arrays.append( - da.full( - before_shape, fill_value, dtype=dtype, chunks=before_chunks - ) + da.full(before_shape, fill_value, dtype=dtype, chunks=before_chunks) ) arrays.append(array) if pad[1] > 0: arrays.append( - da.full( - after_shape, fill_value, dtype=dtype, chunks=after_chunks - ) + da.full(after_shape, fill_value, dtype=dtype, chunks=after_chunks) ) if len(arrays) > 1: array = da.concatenate(arrays, axis=axis) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ce0f719eae0..d30c40b9184 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3115,13 +3115,13 @@ def pad( pad_widths: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", stat_length: Union[ - int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, constant_values: Union[ - int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, end_values: Union[ - int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, reflect_type: str = None, **pad_widths_kwargs: Any, diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 6003dd432b6..c8ad5a5561f 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -20,7 +20,7 @@ from . import dask_array_compat except ImportError: dask_array = None # type: ignore - dask_array_compat = None + dask_array_compat = None # type: ignore def _dask_or_eager_func( @@ -457,4 +457,5 @@ def rolling_window(array, axis, window, center, fill_value): else: # np.ndarray return nputils.rolling_window(array, axis, window, center, fill_value) + pad = _dask_or_eager_func("pad", dask_module=dask_array_compat) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index beb2bf4d7a0..1a5502fd92b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1104,13 +1104,13 @@ def pad( pad_widths: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", stat_length: Union[ - int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, constant_values: Union[ - int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, end_values: Union[ - int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, reflect_type: str = None, **pad_widths_kwargs: Any, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a7bdd2c6339..e425f5eb92a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4059,6 +4059,7 @@ def test_pad_constant(self): expected = DataArray( np.pad( np.arange(3 * 4 * 5).reshape(3, 4, 5).astype(np.float32), + mode="constant", pad_width=((1, 3), (0, 0), (0, 0)), constant_values=np.nan, ) @@ -4089,14 +4090,15 @@ def test_pad_reflect(self, mode, reflect_type): actual = ar.pad( dim_0=(1, 3), dim_2=(2, 2), mode=mode, reflect_type=reflect_type ) - expected = DataArray( - np.pad( - np.arange(3 * 4 * 5).reshape(3, 4, 5), - pad_width=((1, 3), (0, 0), (2, 2)), - mode=mode, - reflect_type=reflect_type, - ) - ) + np_kwargs = { + "array": np.arange(3 * 4 * 5).reshape(3, 4, 5), + "pad_width": ((1, 3), (0, 0), (2, 2)), + "mode": mode, + } + # numpy does not support reflect_type=None + if reflect_type is not None: + np_kwargs["reflect_type"] = reflect_type + expected = DataArray(np.pad(**np_kwargs)) assert_identical(actual, expected) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 5cfb67bc641..73996f57add 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -793,15 +793,10 @@ def test_pad(self): ((3, 1), (0, 0), (2, 0)), ] for (xr_arg, np_arg), mode in itertools.product(zip(xr_args, np_args), modes): - print(mode) actual = v.pad(mode=mode, **xr_arg) - expected = np.pad( - np_arg, - mode=mode, - ) + expected = np.pad(data, np_arg, mode=mode,) assert_array_equal(actual, expected) assert isinstance(actual._data, type(v._data)) - assert type(actual._data) == type(v._data) def test_pad_constant_values(self): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) @@ -1875,10 +1870,12 @@ def test_getitem_with_mask_nd_indexer(self): def test_pad(self): import dask + if LooseVersion(dask.__version__) < "0.18.1": pytest.skip("padding was added in Dask version 0.18.1 ") super().test_pad() + class TestIndexVariable(VariableSubclassobjects): cls = staticmethod(IndexVariable) @@ -1985,11 +1982,11 @@ def test_getitem_uint(self): @pytest.mark.xfail def test_pad(self): - super().test_rolling_window() + super().test_pad() @pytest.mark.xfail def test_pad_constant_values(self): - super().test_rolling_window() + super().test_pad_constant_values() @pytest.mark.xfail def test_rolling_window(self): From 0d7f1a76f38cdd2ee04d58de7563f496ef820af3 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Mon, 9 Dec 2019 00:41:55 +0100 Subject: [PATCH 08/22] implement suggested changes: pad_width => padwidths, use pytest.mark.parametrize in test_variable.test_pad --- xarray/core/dataarray.py | 23 +++--- xarray/core/variable.py | 22 +++--- xarray/tests/test_variable.py | 138 +++++++++++++++++++++------------- 3 files changed, 109 insertions(+), 74 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d30c40b9184..2e379b4694e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3112,7 +3112,7 @@ def map_blocks( def pad( self, - pad_widths: Mapping[Hashable, Tuple[int, int]] = None, + pad_width: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", stat_length: Union[ None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -3124,7 +3124,7 @@ def pad( None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, reflect_type: str = None, - **pad_widths_kwargs: Any, + **pad_width_kwargs: Any, ) -> "DataArray": """Pad this array along one or more dimensions. @@ -3134,7 +3134,7 @@ def pad( Parameters ---------- - pad_widths : Mapping with the form of {dim: (pad_before, pad_after)} + pad_width : Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. mode : str (taken from numpy docs) One of the following string values or a user supplied function. @@ -3204,9 +3204,9 @@ def pad( the 'odd' style, the extended part of the array is created by subtracting the reflected values from two times the edge value. - **pad_widths_kwargs: - The keyword arguments form of ``pad_widths``. - One of pad_widths or pad_widths_kwarg must be provided. + **pad_width_kwargs: + The keyword arguments form of ``pad_width``. + One of pad_width or pad_width_kwarg must be provided. Returns ------- @@ -3215,8 +3215,7 @@ def pad( See also -------- - shift - roll + DataArray.shift, DataArray.roll, numpy.pad, dask.array.pad Examples -------- @@ -3228,10 +3227,10 @@ def pad( Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan """ - pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") + pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") variable = self.variable.pad( - pad_widths=pad_widths, + pad_width=pad_width, mode=mode, stat_length=stat_length, constant_values=constant_values, @@ -3253,9 +3252,9 @@ def pad( coords = {} for name, dim in self.coords.items(): - if name in pad_widths: + if name in pad_width: coords[name] = dim.variable.pad( - {name: pad_widths[name]}, mode=coord_pad_mode, **coord_pad_options + {name: pad_width[name]}, mode=coord_pad_mode, **coord_pad_options ) else: coords[name] = as_variable(dim, name=name) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1a5502fd92b..a2373a9aaa5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -3,7 +3,7 @@ from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion -from typing import Any, Dict, Hashable, Mapping, TypeVar, Union, Tuple +from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union import numpy as np import pandas as pd @@ -1101,7 +1101,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): def pad( self, - pad_widths: Mapping[Hashable, Tuple[int, int]] = None, + pad_width: Mapping[Hashable, Tuple[int, int]] = None, mode: str = "constant", stat_length: Union[ None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -1113,26 +1113,26 @@ def pad( None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, reflect_type: str = None, - **pad_widths_kwargs: Any, + **pad_width_kwargs: Any, ): """ Return a new Variable with padded data. Parameters ---------- - pad_widths: Mapping with the form of {dim: (pad_before, pad_after)} + pad_width: Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. mode: (str) See numpy / Dask docs - **pad_widths_kwarg: - One of pad_widths or pad_widths_kwarg must be provided. + **pad_width_kwarg: + One of pad_width or pad_width_kwarg must be provided. Returns ------- padded : Variable Variable with the same dimensions and attributes but padded data. """ - pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") + pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") # change default behaviour of pad with mode constant if mode == "constant" and constant_values is None: @@ -1161,7 +1161,7 @@ def pad( if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]: stat_length = [(n, n) for n in self.data.shape] - pads = [(0, 0) if d not in pad_widths else pad_widths[d] for d in self.dims] + pads = [(0, 0) if d not in pad_width else pad_width[d] for d in self.dims] # numpy/dask work with optional kwargs pad_option_kwargs = {} @@ -1872,10 +1872,10 @@ def _coarsen_reshape(self, windows, boundary, side): if pad < 0: pad += window if side[d] == "left": - pad_widths = {d: (0, pad)} + pad_width = {d: (0, pad)} else: - pad_widths = {d: (pad, 0)} - variable = variable.pad(pad_widths, mode="constant") + pad_width = {d: (pad, 0)} + variable = variable.pad(pad_width, mode="constant") else: raise TypeError( "{} is invalid for boundary. Valid option is 'exact', " diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 73996f57add..e0e1333c5aa 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2,8 +2,6 @@ from copy import copy, deepcopy from datetime import datetime, timedelta from textwrap import dedent -import itertools -from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -780,54 +778,69 @@ def test_getitem_error(self): with raises_regex(IndexError, "Dimensions of indexers mis"): v[:, ind] - def test_pad(self): + @pytest.mark.parametrize( + "mode", + [ + pytest.param("mean", marks=pytest.mark.xfail), + pytest.param("median", marks=pytest.mark.xfail), + pytest.param("reflect", marks=pytest.mark.xfail), + "edge", + "linear_ramp", + "maximum", + "minimum", + "symmetric", + "wrap", + ], + ) + @pytest.mark.parametrize( + "xr_arg, np_arg", + [ + [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], + [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + ], + ) + def test_pad(self, mode, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) - # TODO "mean", "median" and "reflect" have issues - modes = ["edge", "linear_ramp", "maximum", "minimum", "symmetric", "wrap"] - xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] - np_args = [ - ((2, 1), (0, 0), (0, 0)), - ((0, 0), (0, 3), (0, 0)), - ((3, 1), (0, 0), (2, 0)), - ] - for (xr_arg, np_arg), mode in itertools.product(zip(xr_args, np_args), modes): - actual = v.pad(mode=mode, **xr_arg) - expected = np.pad(data, np_arg, mode=mode,) - assert_array_equal(actual, expected) - assert isinstance(actual._data, type(v._data)) - - def test_pad_constant_values(self): + actual = v.pad(mode=mode, **xr_arg) + expected = np.pad(data, np_arg, mode=mode,) + + assert_array_equal(actual, expected) + assert isinstance(actual._data, type(v._data)) + + @pytest.mark.parametrize( + "xr_arg, np_arg", + [ + [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], + [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + ], + ) + def test_pad_constant_values(self, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) - xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] - np_args = [ - ((2, 1), (0, 0), (0, 0)), - ((0, 0), (0, 3), (0, 0)), - ((3, 1), (0, 0), (2, 0)), - ] - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad(**xr_arg) - expected = np.pad( - np.array(v.data.astype(float)), - np_arg, - mode="constant", - constant_values=np.nan, - ) - assert_array_equal(actual, expected) - assert isinstance(actual._data, type(v._data)) + actual = v.pad(**xr_arg) + expected = np.pad( + np.array(v.data.astype(float)), + np_arg, + mode="constant", + constant_values=np.nan, + ) + assert_array_equal(actual, expected) + assert isinstance(actual._data, type(v._data)) # for the boolean array, we pad False data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad(mode="constant", constant_values=False, **xr_arg) - expected = np.pad( - np.array(v.data), np_arg, mode="constant", constant_values=False - ) - assert_array_equal(actual, expected) + + actual = v.pad(mode="constant", constant_values=False, **xr_arg) + expected = np.pad( + np.array(v.data), np_arg, mode="constant", constant_values=False + ) + assert_array_equal(actual, expected) def test_rolling_window(self): # Just a working test. See test_nputils for the algorithm validation @@ -1868,13 +1881,6 @@ def test_getitem_with_mask_nd_indexer(self): self.cls(("x", "y"), [[0, -1], [-1, 2]]), ) - def test_pad(self): - import dask - - if LooseVersion(dask.__version__) < "0.18.1": - pytest.skip("padding was added in Dask version 0.18.1 ") - super().test_pad() - class TestIndexVariable(VariableSubclassobjects): cls = staticmethod(IndexVariable) @@ -1981,12 +1987,42 @@ def test_getitem_uint(self): super().test_getitem_fancy() @pytest.mark.xfail - def test_pad(self): - super().test_pad() + @pytest.mark.parametrize( + "mode", + [ + pytest.param("mean", marks=pytest.mark.xfail), + pytest.param("median", marks=pytest.mark.xfail), + pytest.param("reflect", marks=pytest.mark.xfail), + "edge", + "linear_ramp", + "maximum", + "minimum", + "symmetric", + "wrap", + ], + ) + @pytest.mark.parametrize( + "xr_arg, np_arg", + [ + [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], + [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + ], + ) + def test_pad(self, mode, xr_arg, np_arg): + super().test_pad(mode, xr_arg, np_arg) @pytest.mark.xfail - def test_pad_constant_values(self): - super().test_pad_constant_values() + @pytest.mark.parametrize( + "xr_arg, np_arg", + [ + [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], + [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + ], + ) + def test_pad_constant_values(self, xr_arg, np_arg): + super().test_pad_constant_values(xr_arg, np_arg) @pytest.mark.xfail def test_rolling_window(self): From 1ee295089934fc45a34f93ea4f89da3ad13d56e3 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Sat, 28 Dec 2019 12:22:51 +0100 Subject: [PATCH 09/22] move pad method to dataset --- xarray/core/dataarray.py | 29 +++----------------- xarray/core/dataset.py | 52 ++++++++++++++++++++++++++++++++++++ xarray/core/variable.py | 5 ++-- xarray/tests/test_dataset.py | 13 +++++++++ 4 files changed, 70 insertions(+), 29 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2e379b4694e..58dc96a5982 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3227,39 +3227,16 @@ def pad( Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan """ - pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") - - variable = self.variable.pad( + ds = self._to_temp_dataset().pad( pad_width=pad_width, mode=mode, stat_length=stat_length, constant_values=constant_values, end_values=end_values, reflect_type=reflect_type, + **pad_width_kwargs, ) - - if mode in ("edge", "reflect", "symmetric", "wrap"): - coord_pad_mode = mode - coord_pad_options = { - "stat_length": stat_length, - "constant_values": constant_values, - "end_values": end_values, - "reflect_type": reflect_type, - } - else: - coord_pad_mode = "constant" - coord_pad_options = {} - - coords = {} - for name, dim in self.coords.items(): - if name in pad_width: - coords[name] = dim.variable.pad( - {name: pad_width[name]}, mode=coord_pad_mode, **coord_pad_options - ) - else: - coords[name] = as_variable(dim, name=name) - - return self._replace(variable=variable, coords=coords) + return self._from_temp_dataset(ds) # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2b89051e84e..b679f297872 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5475,5 +5475,57 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def pad( + self, + pad_width: Mapping[Hashable, Tuple[int, int]] = None, + mode: str = "constant", + stat_length: Union[ + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_width_kwargs: Any, + ) -> "Dataset": + + pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") + + if mode in ("edge", "reflect", "symmetric", "wrap"): + coord_pad_mode = mode + coord_pad_options = { + "stat_length": stat_length, + "constant_values": constant_values, + "end_values": end_values, + "reflect_type": reflect_type, + } + else: + coord_pad_mode = "constant" + coord_pad_options = {} + + variables = {} + for name, var in self.variables.items(): + var_pad_width = {k: v for k, v in pad_width.items() if k in var.dims} + if not var_pad_width: + variables[name] = var + elif name in self.data_vars: + variables[name] = var.pad( + pad_width=var_pad_width, + mode=mode, + stat_length=stat_length, + constant_values=constant_values, + end_values=end_values, + reflect_type=reflect_type, + ) + else: + variables[name] = var.pad( + pad_width=var_pad_width, mode=coord_pad_mode, **coord_pad_options, + ) + + return self._replace_vars_and_dims(variables) + ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a2373a9aaa5..33abd551c97 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1149,12 +1149,11 @@ def pad( if isinstance(constant_values, dict): constant_values = [ (0, 0) if d not in constant_values else constant_values[d] - for d, n in zip(self.dims, self.data.shape) + for d in self.dims ] if isinstance(end_values, dict): end_values = [ - (0, 0) if d not in end_values else end_values[d] - for d, n in zip(self.dims, self.data.shape) + (0, 0) if d not in end_values else end_values[d] for d in self.dims ] # workaround for bug in Dask's default value of stat_length https://github.com/dask/dask/issues/5303 diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index eab6040e17e..b053defd55d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5197,6 +5197,19 @@ def test_ipython_key_completion(self): ds.data_vars[item] # should not raise assert sorted(actual) == sorted(expected) + def test_pad(self): + ds = create_test_data(seed=1) + padded = ds.pad(dim2=(1, 1), constant_values=42) + + assert padded["dim2"].shape == (11,) + assert padded["var1"].shape == (8, 11) + assert padded["var2"].shape == (8, 11) + assert padded["var3"].shape == (10, 8) + assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} + + np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) + np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) + # Py.test tests From 11023c3310ad8867358d5023f02387255a1b4435 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Sun, 29 Dec 2019 12:30:13 +0100 Subject: [PATCH 10/22] add helper function to variable.pad and fix some mypy errors --- xarray/core/dataset.py | 2 +- xarray/core/variable.py | 27 ++++++++++++--------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b679f297872..2a6496ecc3a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5521,7 +5521,7 @@ def pad( reflect_type=reflect_type, ) else: - variables[name] = var.pad( + variables[name] = var.pad( # type: ignore pad_width=var_pad_width, mode=coord_pad_mode, **coord_pad_options, ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 33abd551c97..64e7b013982 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1099,6 +1099,11 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): result = result._shift_one_dim(dim, count, fill_value=fill_value) return result + def _pad_options_dim_to_index(self, pad_option : Mapping[Hashable, Tuple[int, int]], fill_with_shape = False): + if fill_with_shape: + return [(n, n) if d not in pad_option else pad_option[d] for d, n in zip(self.dims, self.data.shape)] + return [(0, 0) if d not in pad_option else pad_option[d] for d in self.dims] + def pad( self, pad_width: Mapping[Hashable, Tuple[int, int]] = None, @@ -1142,27 +1147,19 @@ def pad( # create pad_options_kwargs, numpy requires only relevant kwargs to be nonempty if isinstance(stat_length, dict): - stat_length = [ - (n, n) if d not in stat_length else stat_length[d] - for d, n in zip(self.dims, self.data.shape) - ] + stat_length = self._pad_options_dim_to_index(stat_length, fill_with_shape=True) if isinstance(constant_values, dict): - constant_values = [ - (0, 0) if d not in constant_values else constant_values[d] - for d in self.dims - ] + constant_values = self._pad_options_dim_to_index(constant_values) if isinstance(end_values, dict): - end_values = [ - (0, 0) if d not in end_values else end_values[d] for d in self.dims - ] + end_values = self._pad_options_dim_to_index(end_values) # workaround for bug in Dask's default value of stat_length https://github.com/dask/dask/issues/5303 if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]: - stat_length = [(n, n) for n in self.data.shape] + stat_length = [(n, n) for n in self.data.shape] # type: ignore - pads = [(0, 0) if d not in pad_width else pad_width[d] for d in self.dims] + pads = self._pad_options_dim_to_index(pad_width) - # numpy/dask work with optional kwargs + # create pad_options_kwargs, numpy/dask requires only relevant kwargs to be nonempty pad_option_kwargs = {} if stat_length is not None: pad_option_kwargs["stat_length"] = stat_length @@ -1171,7 +1168,7 @@ def pad( if end_values is not None: pad_option_kwargs["end_values"] = end_values if reflect_type is not None: - pad_option_kwargs["reflect_type"] = reflect_type + pad_option_kwargs["reflect_type"] = reflect_type # type: ignore array = duck_array_ops.pad( self.data.astype(dtype, copy=False), pads, mode=mode, **pad_option_kwargs From 3aae4ba6bed84073f27245b8aae1dcc7c39ef356 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Tue, 31 Dec 2019 01:06:57 +0100 Subject: [PATCH 11/22] add some more tests for DataArray.pad and add docstrings to all pad methods --- xarray/core/dataarray.py | 27 +++++---- xarray/core/dataset.py | 100 +++++++++++++++++++++++++++++++++ xarray/core/variable.py | 31 ++++++++-- xarray/tests/test_dataarray.py | 63 ++++++++++++++++++++- 4 files changed, 200 insertions(+), 21 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 58dc96a5982..077547a3283 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3168,31 +3168,31 @@ def pad( Pads with the wrap of the vector along the axis. The first values are used to pad the end and the end values are used to pad the beginning. - stat_length : sequence or int, optional + stat_length : int, tuple or mapping of the form {dim: tuple} Used in 'maximum', 'mean', 'median', and 'minimum'. Number of values at edge of each axis used to calculate the statistic value. - ((before_1, after_1), ... (before_N, after_N)) unique statistic - lengths for each axis. + {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique + statistic lengths along each dimension. ((before, after),) yields same before and after statistic lengths - for each axis. + for each dimension. (stat_length,) or int is a shortcut for before = after = statistic length for all axes. Default is ``None``, to use the entire axis. - constant_values : sequence or scalar, optional + constant_values : scalar, tuple or mapping of the form {dim: tuple} Used in 'constant'. The values to set the padded values for each axis. - ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants - for each axis. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + pad constants along each dimension. ``((before, after),)`` yields same before and after constants for each - axis. + dimension. ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for - all axes. + all dimensions. Default is 0. - end_values : sequence or scalar, optional + end_values : scalar, tuple or mapping of the form {dim: tuple} Used in 'linear_ramp'. The values used for the ending value of the linear_ramp and that will form the edge of the padded array. - ``((before_1, after_1), ... (before_N, after_N))`` unique end values - for each axis. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + end values along each dimension. ``((before, after),)`` yields same before and after end values for each axis. ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for @@ -3203,7 +3203,6 @@ def pad( default with an unaltered reflection around the edge value. For the 'odd' style, the extended part of the array is created by subtracting the reflected values from two times the edge value. - **pad_width_kwargs: The keyword arguments form of ``pad_width``. One of pad_width or pad_width_kwarg must be provided. @@ -3221,7 +3220,7 @@ def pad( -------- >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) - >>> arr.pad(x=(1,2), pad_options={"constant_values" : 0}) + >>> arr.pad(x=(1,2), constant_values=0) array([0, 5, 6, 7, 0, 0]) Coordinates: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2a6496ecc3a..32fffad5959 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5491,7 +5491,107 @@ def pad( reflect_type: str = None, **pad_width_kwargs: Any, ) -> "Dataset": + """Pad this dataset along one or more dimensions. + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), + coordinates will be padded with the same mode, otherwise coordinates + are padded using the "constant" mode with fill_value dtypes.NA. + + Parameters + ---------- + pad_width : Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + mode : str (taken from numpy docs) + One of the following string values or a user supplied function. + 'constant' (default) + Pads with a constant value. + 'edge' + Pads with the edge values of array. + 'linear_ramp' + Pads with the linear ramp between end_value and the + array edge value. + 'maximum' + Pads with the maximum value of all or part of the + vector along each axis. + 'mean' + Pads with the mean value of all or part of the + vector along each axis. + 'median' + Pads with the median value of all or part of the + vector along each axis. + 'minimum' + Pads with the minimum value of all or part of the + vector along each axis. + 'reflect' + Pads with the reflection of the vector mirrored on + the first and last values of the vector along each + axis. + 'symmetric' + Pads with the reflection of the vector mirrored + along the edge of the array. + 'wrap' + Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique + statistic lengths along each dimension. + ((before, after),) yields same before and after statistic lengths + for each dimension. + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + Default is ``None``, to use the entire axis. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + pad constants along each dimension. + ``((before, after),)`` yields same before and after constants for each + dimension. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all dimensions. + Default is 0. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + end values along each dimension. + ``((before, after),)`` yields same before and after end values for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + **pad_width_kwargs: + The keyword arguments form of ``pad_width``. + One of pad_width or pad_width_kwarg must be provided. + + Returns + ------- + padded : Dataset + Dataset with the padded coordinates and data. + + See also + -------- + DataArray.shift, DataArray.roll, numpy.pad, dask.array.pad + + Examples + -------- + + >>> ds = xr.Dataset({'foo': ('x', range(5))}) + >>> ds.pad(x=(1,2)) + + Dimensions: (x: 8) + Dimensions without coordinates: x + Data variables: + foo (x) float64 nan 0.0 1.0 2.0 3.0 4.0 nan nan + """ pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") if mode in ("edge", "reflect", "symmetric", "wrap"): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 64e7b013982..b0a64169d00 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1099,9 +1099,14 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): result = result._shift_one_dim(dim, count, fill_value=fill_value) return result - def _pad_options_dim_to_index(self, pad_option : Mapping[Hashable, Tuple[int, int]], fill_with_shape = False): + def _pad_options_dim_to_index( + self, pad_option: Mapping[Hashable, Tuple[int, int]], fill_with_shape=False + ): if fill_with_shape: - return [(n, n) if d not in pad_option else pad_option[d] for d, n in zip(self.dims, self.data.shape)] + return [ + (n, n) if d not in pad_option else pad_option[d] + for d, n in zip(self.dims, self.data.shape) + ] return [(0, 0) if d not in pad_option else pad_option[d] for d in self.dims] def pad( @@ -1129,6 +1134,20 @@ def pad( Number of values padded along each dimension. mode: (str) See numpy / Dask docs + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. **pad_width_kwarg: One of pad_width or pad_width_kwarg must be provided. @@ -1147,7 +1166,9 @@ def pad( # create pad_options_kwargs, numpy requires only relevant kwargs to be nonempty if isinstance(stat_length, dict): - stat_length = self._pad_options_dim_to_index(stat_length, fill_with_shape=True) + stat_length = self._pad_options_dim_to_index( + stat_length, fill_with_shape=True + ) if isinstance(constant_values, dict): constant_values = self._pad_options_dim_to_index(constant_values) if isinstance(end_values, dict): @@ -1155,7 +1176,7 @@ def pad( # workaround for bug in Dask's default value of stat_length https://github.com/dask/dask/issues/5303 if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]: - stat_length = [(n, n) for n in self.data.shape] # type: ignore + stat_length = [(n, n) for n in self.data.shape] # type: ignore pads = self._pad_options_dim_to_index(pad_width) @@ -1168,7 +1189,7 @@ def pad( if end_values is not None: pad_option_kwargs["end_values"] = end_values if reflect_type is not None: - pad_option_kwargs["reflect_type"] = reflect_type # type: ignore + pad_option_kwargs["reflect_type"] = reflect_type # type: ignore array = duck_array_ops.pad( self.data.astype(dtype, copy=False), pads, mode=mode, **pad_option_kwargs diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index e425f5eb92a..4463ee4916b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4064,14 +4064,47 @@ def test_pad_constant(self): constant_values=np.nan, ) ) + assert actual.shape == (7, 4, 5) + assert_identical(actual, expected) + + def test_pad_coords(self): + ar = DataArray( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + [("x", np.arange(3)), ("y", np.arange(4)), ("z", np.arange(5))], + ) + actual = ar.pad(x=(1, 3), constant_values=1) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + mode="constant", + pad_width=((1, 3), (0, 0), (0, 0)), + constant_values=1, + ), + [ + ( + "x", + np.pad( + np.arange(3).astype(np.float32), + mode="constant", + pad_width=(1, 3), + constant_values=np.nan, + ), + ), + ("y", np.arange(4)), + ("z", np.arange(5)), + ], + ) assert_identical(actual, expected) @pytest.mark.parametrize("mode", ("minimum", "maximum", "mean", "median")) - @pytest.mark.parametrize("stat_length", (None, 3, (1, 3))) + @pytest.mark.parametrize( + "stat_length", (None, 3, (1, 3), {"dim_0": (2, 1), "dim_2": (4, 2)}) + ) def test_pad_stat_length(self, mode, stat_length): - ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad(dim_0=(1, 3), dim_2=(2, 2), mode=mode, stat_length=stat_length) + if isinstance(stat_length, dict): + stat_length = (stat_length["dim_0"], (4, 4), stat_length["dim_2"]) expected = DataArray( np.pad( np.arange(3 * 4 * 5).reshape(3, 4, 5), @@ -4080,6 +4113,30 @@ def test_pad_stat_length(self, mode, stat_length): stat_length=stat_length, ) ) + assert actual.shape == (7, 4, 9) + assert_identical(actual, expected) + + @pytest.mark.parametrize( + "end_values", (None, 3, (3, 5), {"dim_0": (2, 1), "dim_2": (4, 2)}) + ) + def test_pad_linear_ramp(self, end_values): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad( + dim_0=(1, 3), dim_2=(2, 2), mode="linear_ramp", end_values=end_values + ) + if end_values is None: + end_values = 0 + elif isinstance(end_values, dict): + end_values = (end_values["dim_0"], (4, 4), end_values["dim_2"]) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + pad_width=((1, 3), (0, 0), (2, 2)), + mode="linear_ramp", + end_values=end_values, + ) + ) + assert actual.shape == (7, 4, 9) assert_identical(actual, expected) @pytest.mark.parametrize("mode", ("reflect", "symmetric")) @@ -4099,6 +4156,8 @@ def test_pad_reflect(self, mode, reflect_type): if reflect_type is not None: np_kwargs["reflect_type"] = reflect_type expected = DataArray(np.pad(**np_kwargs)) + + assert actual.shape == (7, 4, 9) assert_identical(actual, expected) From 314f007a07d4d050080a0ecb026b3454fa70404f Mon Sep 17 00:00:00 2001 From: mark-boer Date: Wed, 1 Jan 2020 14:08:02 +0100 Subject: [PATCH 12/22] add workaround for dask.pad mode=mean that converts integers to floats, and add an additional check if the shape of output --- xarray/core/dask_array_compat.py | 42 ++++++++++++++++++++++++++++---- xarray/core/dataset.py | 4 +-- xarray/core/duck_array_ops.py | 2 +- xarray/tests/test_variable.py | 4 +-- 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index ed402ea1bc6..51159e20fb0 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -99,11 +99,43 @@ def meta_from_array(x, ndim=None, dtype=None): return meta -# TODO figure out how Dask versioning works -# if LooseVersion(dask_version) >= LooseVersion("1.7.0"): -try: - pad = da.pad -except AttributeError: +def _validate_pad_output_shape(input_shape, pad_width, output_shape): + """ Dask.array.pad with mode='reflect' does not always return the correct output_shape. """ + isint = lambda i: isinstance(i, int) + + if isint(pad_width): + pass + elif len(pad_width) == 2 and all(map(isint, pad_width)): + pad_width = sum(pad_width) + elif ( + len(pad_width) == len(input_shape) + and all(map(lambda x: len(x) == 2, pad_width)) + and all((isint(i) for p in pad_width for i in p)) + ): + pad_width = np.sum(pad_width, axis=1) + else: + return # should be impossible + + if not np.array_equal(np.array(input_shape) + pad_width, output_shape): + raise RuntimeError( + "There seems to be something wrong with the shape of the output of dask.array.pad, " + "try upgrading Dask, use a different pad mode e.g. mode='constant' or first convert " + "your DataArray/Dataset to one backed by a numpy array by calling the `compute()` method." + ) + + +if LooseVersion(dask_version) >= LooseVersion("0.18.1"): + + def pad(array, pad_width, mode="constant", **kwargs): + padded = da.pad(array, pad_width, mode=mode, **kwargs) + # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 + if mode == "mean" and issubclass(array.dtype.type, np.integer): + return da.round(padded).astype(array.dtype) + _validate_pad_output_shape(array.shape, pad_width, padded.shape) + return padded + + +else: def pad(array, pad_width, mode="constant", **kwargs): """ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 925ef1cb3a6..7b5f0dc8893 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5820,8 +5820,8 @@ def pad( reflect_type=reflect_type, ) else: - variables[name] = var.pad( # type: ignore - pad_width=var_pad_width, mode=coord_pad_mode, **coord_pad_options, + variables[name] = var.pad( + pad_width=var_pad_width, mode=coord_pad_mode, **coord_pad_options, # type: ignore ) return self._replace_vars_and_dims(variables) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 67a07d634c2..3d3510ee95c 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from . import dask_array_ops, dask_array_compat, dtypes, npcompat, nputils +from . import dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index f1145c1f4b0..6e8641329e0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -788,7 +788,7 @@ def test_getitem_error(self): @pytest.mark.parametrize( "mode", [ - pytest.param("mean", marks=pytest.mark.xfail), + "mean", pytest.param("median", marks=pytest.mark.xfail), pytest.param("reflect", marks=pytest.mark.xfail), "edge", @@ -2070,7 +2070,7 @@ def test_getitem_uint(self): @pytest.mark.parametrize( "mode", [ - pytest.param("mean", marks=pytest.mark.xfail), + "mean", pytest.param("median", marks=pytest.mark.xfail), pytest.param("reflect", marks=pytest.mark.xfail), "edge", From 75154783ad71e3184b2cbce122cb3a9b976987d7 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Sun, 26 Jan 2020 00:23:14 +0100 Subject: [PATCH 13/22] disable linear_ramp test and add pad to whats-new.rst and api.rst --- doc/api.rst | 2 ++ doc/whats-new.rst | 2 ++ xarray/tests/test_variable.py | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 9735eb0c708..407383bf367 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -220,6 +220,7 @@ Reshaping and reorganizing Dataset.to_stacked_array Dataset.shift Dataset.roll + Dataset.pad Dataset.sortby Dataset.broadcast_like @@ -399,6 +400,7 @@ Reshaping and reorganizing DataArray.to_unstacked_dataset DataArray.shift DataArray.roll + DataArray.pad DataArray.sortby DataArray.broadcast_like diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 00d1c50780e..69c7c0bae4f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Implement :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad`. (:issue:`2605`). + By `Mark Boer `_. - Implement :py:func:`median` and :py:func:`nanmedian` for dask arrays. This works by rechunking to a single chunk along all reduction axes. (:issue:`2999`). By `Deepak Cherian `_. diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 6e8641329e0..44a7ef40c90 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -792,7 +792,7 @@ def test_getitem_error(self): pytest.param("median", marks=pytest.mark.xfail), pytest.param("reflect", marks=pytest.mark.xfail), "edge", - "linear_ramp", + pytest.param("linear_ramp", marks=pytest.mark.xfail), "maximum", "minimum", "symmetric", From 855c39e5f2a5c6117ff8bbf71c799cef031f8301 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Sun, 26 Jan 2020 16:39:48 +0100 Subject: [PATCH 14/22] fix small mege issue in test_unit --- doc/api-hidden.rst | 2 -- xarray/tests/test_units.py | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index c117b0f4fc7..a73b776d128 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -379,7 +379,6 @@ Variable.min Variable.no_conflicts Variable.notnull - Variable.pad_with_fill_value Variable.prod Variable.quantile Variable.rank @@ -453,7 +452,6 @@ IndexVariable.min IndexVariable.no_conflicts IndexVariable.notnull - IndexVariable.pad_with_fill_value IndexVariable.prod IndexVariable.quantile IndexVariable.rank diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index d98e5e23516..8cfffb1a4d2 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2061,10 +2061,10 @@ def test_pad_with_fill_value(self, unit, error, dtype): fill_value = -100 * unit - func = method("pad_with_fill_value", x=(2, 3), y=(1, 4)) + func = method("pad", mode="constant", x=(2, 3), y=(1, 4)) if error is not None: with pytest.raises(error): - func(variable, fill_value=fill_value) + func(variable, constant_values=fill_value) return @@ -2072,11 +2072,11 @@ def test_pad_with_fill_value(self, unit, error, dtype): expected = attach_units( func( strip_units(variable), - fill_value=strip_units(convert_units(fill_value, units)), + constant_values=strip_units(convert_units(fill_value, units)), ), units, ) - actual = func(variable, fill_value=fill_value) + actual = func(variable, constant_values=fill_value) assert_units_equal(expected, actual) xr.testing.assert_identical(expected, actual) From d507d1d0aa01a507bd16eb3074841537e1b4be80 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Sun, 26 Jan 2020 22:42:58 +0100 Subject: [PATCH 15/22] fix DataArray.pad and Dataset.pad docstrings --- xarray/core/dataarray.py | 3 ++- xarray/core/dataset.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 93bb18ddc65..c03a9293307 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3257,7 +3257,8 @@ def pad( pad_width : Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. mode : str (taken from numpy docs) - One of the following string values or a user supplied function. + One of the following string values + 'constant' (default) Pads with a constant value. 'edge' diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index be1aa90c78f..20e7ea2bc6e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5713,7 +5713,8 @@ def pad( pad_width : Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. mode : str (taken from numpy docs) - One of the following string values or a user supplied function. + One of the following string values. + 'constant' (default) Pads with a constant value. 'edge' @@ -5790,7 +5791,7 @@ def pad( See also -------- - DataArray.shift, DataArray.roll, numpy.pad, dask.array.pad + Dataset.shift, Dataset.roll, numpy.pad, dask.array.pad Examples -------- From 64ac8a2cca92c2b2359513248c5ce562ae381728 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Wed, 12 Feb 2020 22:45:06 +0100 Subject: [PATCH 16/22] implement suggested changes from code review: add option of integer pad_width, add a warning and exception to dask_array_compad.pad --- xarray/core/dask_array_compat.py | 87 ++++++++------------------------ xarray/core/dataarray.py | 3 +- xarray/core/dataset.py | 3 +- xarray/core/variable.py | 19 +++++-- xarray/tests/test_variable.py | 18 +++++-- 5 files changed, 53 insertions(+), 77 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 51159e20fb0..cb5618c128d 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,5 +1,6 @@ from distutils.version import LooseVersion from typing import Iterable +import warnings import numpy as np @@ -10,8 +11,6 @@ dask_version = "0.0.0" da = None -from . import dtypes - if LooseVersion(dask_version) >= LooseVersion("2.0.0"): meta_from_array = da.utils.meta_from_array else: @@ -100,7 +99,10 @@ def meta_from_array(x, ndim=None, dtype=None): def _validate_pad_output_shape(input_shape, pad_width, output_shape): - """ Dask.array.pad with mode='reflect' does not always return the correct output_shape. """ + """ Validates the output shape of dask.array.pad, raising a RuntimeError if they do not match. + In the current versions of dask (2.2/2.4), dask.array.pad with mode='reflect' sometimes returns + an invalid shape. + """ isint = lambda i: isinstance(i, int) if isint(pad_width): @@ -114,78 +116,31 @@ def _validate_pad_output_shape(input_shape, pad_width, output_shape): ): pad_width = np.sum(pad_width, axis=1) else: - return # should be impossible + # unreachable: dask.array.pad should already have thrown an error + raise ValueError("Invalid value for `pad_width`") if not np.array_equal(np.array(input_shape) + pad_width, output_shape): raise RuntimeError( "There seems to be something wrong with the shape of the output of dask.array.pad, " "try upgrading Dask, use a different pad mode e.g. mode='constant' or first convert " "your DataArray/Dataset to one backed by a numpy array by calling the `compute()` method." + "See: https://github.com/dask/dask/issues/5303" ) -if LooseVersion(dask_version) >= LooseVersion("0.18.1"): - - def pad(array, pad_width, mode="constant", **kwargs): - padded = da.pad(array, pad_width, mode=mode, **kwargs) - # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 - if mode == "mean" and issubclass(array.dtype.type, np.integer): - return da.round(padded).astype(array.dtype) - _validate_pad_output_shape(array.shape, pad_width, padded.shape) - return padded - - -else: - - def pad(array, pad_width, mode="constant", **kwargs): - """ - Return a new dask.DataArray wit padding. This functions implements a - constant padding for versions of Dask that do not implement this yet. - - Parameters - ---------- - array: Array to pad - - pad_width: List of the form [(before, after)] - Number of values padded to the edges of axis. - """ - if mode != "constant": - raise NotImplementedError( - "Pad is not yet implemented for your current version of Dask. " - "Please update your version of Dask or use the " - "mode=`constant`, that is added by xarray." - ) - - try: - fill_value = kwargs["constant_values"] - dtype = array.dtype - except KeyError: - dtype, fill_value = dtypes.maybe_promote(array.dtype) - - for axis, pad in enumerate(pad_width): - before_shape = list(array.shape) - before_shape[axis] = pad[0] - before_chunks = list(array.chunks) - before_chunks[axis] = (pad[0],) - after_shape = list(array.shape) - after_shape[axis] = pad[1] - after_chunks = list(array.chunks) - after_chunks[axis] = (pad[1],) - - arrays = [] - if pad[0] > 0: - arrays.append( - da.full(before_shape, fill_value, dtype=dtype, chunks=before_chunks) - ) - arrays.append(array) - if pad[1] > 0: - arrays.append( - da.full(after_shape, fill_value, dtype=dtype, chunks=after_chunks) - ) - if len(arrays) > 1: - array = da.concatenate(arrays, axis=axis) - - return array +def pad(array, pad_width, mode="constant", **kwargs): + padded = da.pad(array, pad_width, mode=mode, **kwargs) + # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 + if mode == "mean" and issubclass(array.dtype.type, np.integer): + warnings.warn( + '`dask.array.pad(mode="mean")` converts integers to floats. xarray converts ' + "these floats back to integers, to keep the interface consistent. There is a chance that " + "this introduces rounding errors. If you wish to keep the values as floats, first change " + "the type to a float before calling `pad`." + ) + return da.round(padded).astype(array.dtype) + _validate_pad_output_shape(array.shape, pad_width, padded.shape) + return padded if LooseVersion(dask_version) >= LooseVersion("2.8.1"): diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c03a9293307..c117ec5ee82 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3232,7 +3232,7 @@ def map_blocks( def pad( self, - pad_width: Mapping[Hashable, Tuple[int, int]] = None, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, mode: str = "constant", stat_length: Union[ None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -3256,6 +3256,7 @@ def pad( ---------- pad_width : Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad mode : str (taken from numpy docs) One of the following string values diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 20e7ea2bc6e..3d6ba35fa74 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5688,7 +5688,7 @@ def map_blocks( def pad( self, - pad_width: Mapping[Hashable, Tuple[int, int]] = None, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, mode: str = "constant", stat_length: Union[ None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -5712,6 +5712,7 @@ def pad( ---------- pad_width : Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad mode : str (taken from numpy docs) One of the following string values. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9245445799e..0e88eb2bd91 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,6 +5,7 @@ from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion +import numbers from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union import numpy as np @@ -1154,7 +1155,9 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): return result def _pad_options_dim_to_index( - self, pad_option: Mapping[Hashable, Tuple[int, int]], fill_with_shape=False + self, + pad_option: Mapping[Hashable, Union[int, Tuple[int, int]]], + fill_with_shape=False, ): if fill_with_shape: return [ @@ -1165,7 +1168,7 @@ def _pad_options_dim_to_index( def pad( self, - pad_width: Mapping[Hashable, Tuple[int, int]] = None, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, mode: str = "constant", stat_length: Union[ None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -1186,6 +1189,7 @@ def pad( ---------- pad_width: Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad mode: (str) See numpy / Dask docs stat_length : int, tuple or mapping of the form {dim: tuple} @@ -1232,7 +1236,11 @@ def pad( if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]: stat_length = [(n, n) for n in self.data.shape] # type: ignore - pads = self._pad_options_dim_to_index(pad_width) + # change integer values to a tuple of two of those values and change pad_width to index + for k, v in pad_width.items(): + if isinstance(v, numbers.Number): + pad_width[k] = (v, v) + pad_width_by_index = self._pad_options_dim_to_index(pad_width) # create pad_options_kwargs, numpy/dask requires only relevant kwargs to be nonempty pad_option_kwargs = {} @@ -1246,7 +1254,10 @@ def pad( pad_option_kwargs["reflect_type"] = reflect_type # type: ignore array = duck_array_ops.pad( - self.data.astype(dtype, copy=False), pads, mode=mode, **pad_option_kwargs + self.data.astype(dtype, copy=False), + pad_width_by_index, + mode=mode, + **pad_option_kwargs, ) return type(self)(self.dims, array) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 44a7ef40c90..0e601a0865d 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -789,10 +789,15 @@ def test_getitem_error(self): "mode", [ "mean", - pytest.param("median", marks=pytest.mark.xfail), - pytest.param("reflect", marks=pytest.mark.xfail), + pytest.param( + "median", + marks=pytest.mark.xfail(reason="median is not implemented by Dask"), + ), + pytest.param( + "reflect", marks=pytest.mark.xfail(reason="dask.array.pad bug") + ), "edge", - pytest.param("linear_ramp", marks=pytest.mark.xfail), + "linear_ramp", "maximum", "minimum", "symmetric", @@ -803,8 +808,10 @@ def test_getitem_error(self): "xr_arg, np_arg", [ [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"x": 1}, ((1, 1), (0, 0), (0, 0))], [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + [{"x": (3, 1), "z": 2}, ((3, 1), (0, 0), (2, 2))], ], ) def test_pad(self, mode, xr_arg, np_arg): @@ -2066,13 +2073,14 @@ def test_getitem_fancy(self): def test_getitem_uint(self): super().test_getitem_fancy() + # TODO would be nice if it was not necessary to repeat all the parameters @pytest.mark.xfail @pytest.mark.parametrize( "mode", [ "mean", - pytest.param("median", marks=pytest.mark.xfail), - pytest.param("reflect", marks=pytest.mark.xfail), + "median", + "reflect", "edge", "linear_ramp", "maximum", From 71e11bb0e11773b1a874c22382cf43dd721e6d11 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Thu, 13 Feb 2020 00:25:39 +0100 Subject: [PATCH 17/22] apply isort and and set linear_ramp to xfail --- xarray/core/dask_array_compat.py | 2 +- xarray/core/nputils.py | 1 - xarray/core/variable.py | 2 +- xarray/tests/test_variable.py | 7 ++++++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index cb5618c128d..bf035dc2aa3 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,6 +1,6 @@ +import warnings from distutils.version import LooseVersion from typing import Iterable -import warnings import numpy as np diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index dba67174fc1..cf189e471cc 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd - from numpy.core.multiarray import normalize_axis_index try: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 0e88eb2bd91..78afef00e0f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1,11 +1,11 @@ import copy import functools import itertools +import numbers import warnings from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion -import numbers from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union import numpy as np diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0e601a0865d..e41b6219ee8 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -797,7 +797,12 @@ def test_getitem_error(self): "reflect", marks=pytest.mark.xfail(reason="dask.array.pad bug") ), "edge", - "linear_ramp", + pytest.param( + "linear_ramp", + marks=pytest.mark.xfail( + reason="pint bug: https://github.com/hgrecco/pint/issues/1026" + ), + ), "maximum", "minimum", "symmetric", From 7060b07939d57ab0d3dc2ff51a164dc37bf54177 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 5 Mar 2020 17:00:45 +0530 Subject: [PATCH 18/22] Minor fixes. 1. Add warning category 2. Use variable for pad arguments when testing 3. Add example. --- doc/whats-new.rst | 2 +- xarray/core/dask_array_compat.py | 7 +++-- xarray/core/dataarray.py | 41 ++++++++++++++++++++++++-- xarray/core/dataset.py | 12 ++++++-- xarray/core/variable.py | 4 +-- xarray/tests/test_variable.py | 49 +++++++++----------------------- 6 files changed, 67 insertions(+), 48 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fb54e4b62c2..64557a7795d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,7 +40,7 @@ Breaking changes New Features ~~~~~~~~~~~~ -- Implement :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad`. (:issue:`2605`). +- Implement :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad`. (:issue:`2605`, :pull:`3596`). By `Mark Boer `_. - :py:meth:`DataArray.sel` and :py:meth:`Dataset.sel` now support :py:class:`pandas.CategoricalIndex`. (:issue:`3669`) By `Keisuke Fujii `_. diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index bf035dc2aa3..c8a0a8ba6f3 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -133,10 +133,11 @@ def pad(array, pad_width, mode="constant", **kwargs): # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 if mode == "mean" and issubclass(array.dtype.type, np.integer): warnings.warn( - '`dask.array.pad(mode="mean")` converts integers to floats. xarray converts ' - "these floats back to integers, to keep the interface consistent. There is a chance that " + 'dask.array.pad(mode="mean") converts integers to floats. xarray converts ' + "these floats back to integers to keep the interface consistent. There is a chance that " "this introduces rounding errors. If you wish to keep the values as floats, first change " - "the type to a float before calling `pad`." + "the dtype to a float before calling pad.", + UserWarning, ) return da.round(padded).astype(array.dtype) _validate_pad_output_shape(array.shape, pad_width, padded.shape) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c117ec5ee82..196dcb3918a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3257,8 +3257,8 @@ def pad( pad_width : Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad - mode : str (taken from numpy docs) - One of the following string values + mode : str + One of the following string values (taken from numpy docs) 'constant' (default) Pads with a constant value. @@ -3327,7 +3327,7 @@ def pad( subtracting the reflected values from two times the edge value. **pad_width_kwargs: The keyword arguments form of ``pad_width``. - One of pad_width or pad_width_kwarg must be provided. + One of ``pad_width`` or ``pad_width_kwargs`` must be provided. Returns ------- @@ -3338,6 +3338,12 @@ def pad( -------- DataArray.shift, DataArray.roll, numpy.pad, dask.array.pad + Notes + ----- + By default when ``mode="constant"`` and ``constant_values=None``, integer types will be + promoted to ``float`` and padded with ``np.nan``. To avoid type promotion + specify ``constant_values=np.nan`` + Examples -------- @@ -3347,6 +3353,35 @@ def pad( array([0, 5, 6, 7, 0, 0]) Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan + + >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], + dims=["x", "y"], + coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} + ) + >>> da.pad(x=1) + + array([[nan, nan, nan, nan], + [ 0., 1., 2., 3.], + [10., 11., 12., 13.], + [nan, nan, nan, nan]]) + Coordinates: + * x (x) float64 nan 0.0 1.0 nan + * y (y) int64 10 20 30 40 + z (x) float64 nan 100.0 200.0 nan + >>> da.pad(x=1, constant_values=np.nan) + + array([[-9223372036854775808, -9223372036854775808, -9223372036854775808, + -9223372036854775808], + [ 0, 1, 2, + 3], + [ 10, 11, 12, + 13], + [-9223372036854775808, -9223372036854775808, -9223372036854775808, + -9223372036854775808]]) + Coordinates: + * x (x) float64 nan 0.0 1.0 nan + * y (y) int64 10 20 30 40 + z (x) float64 nan 100.0 200.0 nan """ ds = self._to_temp_dataset().pad( pad_width=pad_width, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3d6ba35fa74..88514e81d24 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5713,8 +5713,8 @@ def pad( pad_width : Mapping with the form of {dim: (pad_before, pad_after)} Number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad - mode : str (taken from numpy docs) - One of the following string values. + mode : str + One of the following string values (taken from numpy docs). 'constant' (default) Pads with a constant value. @@ -5783,7 +5783,7 @@ def pad( subtracting the reflected values from two times the edge value. **pad_width_kwargs: The keyword arguments form of ``pad_width``. - One of pad_width or pad_width_kwarg must be provided. + One of ``pad_width`` or ``pad_width_kwargs`` must be provided. Returns ------- @@ -5794,6 +5794,12 @@ def pad( -------- Dataset.shift, Dataset.roll, numpy.pad, dask.array.pad + Notes + ----- + By default when ``mode="constant"`` and ``constant_values=None``, integer types will be + promoted to ``float`` and padded with ``np.nan``. To avoid type promotion + specify ``constant_values=np.nan`` + Examples -------- diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 78afef00e0f..64c0fc0ad05 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1206,8 +1206,8 @@ def pad( default with an unaltered reflection around the edge value. For the 'odd' style, the extended part of the array is created by subtracting the reflected values from two times the edge value. - **pad_width_kwarg: - One of pad_width or pad_width_kwarg must be provided. + **pad_width_kwargs: + One of pad_width or pad_width_kwargs must be provided. Returns ------- diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index e41b6219ee8..f03581ac119 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -38,6 +38,14 @@ source_ndarray, ) +_PAD_XR_NP_ARGS = [ + [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"x": 1}, ((1, 1), (0, 0), (0, 0))], + [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], + [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + [{"x": (3, 1), "z": 2}, ((3, 1), (0, 0), (2, 2))], +] + class VariableSubclassobjects: def test_properties(self): @@ -809,34 +817,18 @@ def test_getitem_error(self): "wrap", ], ) - @pytest.mark.parametrize( - "xr_arg, np_arg", - [ - [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], - [{"x": 1}, ((1, 1), (0, 0), (0, 0))], - [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], - [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], - [{"x": (3, 1), "z": 2}, ((3, 1), (0, 0), (2, 2))], - ], - ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) def test_pad(self, mode, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) actual = v.pad(mode=mode, **xr_arg) - expected = np.pad(data, np_arg, mode=mode,) + expected = np.pad(data, np_arg, mode=mode) assert_array_equal(actual, expected) assert isinstance(actual._data, type(v._data)) - @pytest.mark.parametrize( - "xr_arg, np_arg", - [ - [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], - [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], - [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], - ], - ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) def test_pad_constant_values(self, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) @@ -2078,7 +2070,6 @@ def test_getitem_fancy(self): def test_getitem_uint(self): super().test_getitem_fancy() - # TODO would be nice if it was not necessary to repeat all the parameters @pytest.mark.xfail @pytest.mark.parametrize( "mode", @@ -2094,26 +2085,12 @@ def test_getitem_uint(self): "wrap", ], ) - @pytest.mark.parametrize( - "xr_arg, np_arg", - [ - [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], - [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], - [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], - ], - ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) def test_pad(self, mode, xr_arg, np_arg): super().test_pad(mode, xr_arg, np_arg) @pytest.mark.xfail - @pytest.mark.parametrize( - "xr_arg, np_arg", - [ - [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], - [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], - [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], - ], - ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) def test_pad_constant_values(self, xr_arg, np_arg): super().test_pad_constant_values(xr_arg, np_arg) From 3e6f79286f45d9e0c3a07c00843c30947098d088 Mon Sep 17 00:00:00 2001 From: mark-boer Date: Sun, 8 Mar 2020 19:56:00 +0100 Subject: [PATCH 19/22] fix merge issue and make some minor changes as suggested in the code review --- xarray/core/dataarray.py | 8 ++++---- xarray/core/dataset.py | 8 ++++---- xarray/core/variable.py | 16 ++++++---------- xarray/tests/test_units.py | 4 ++-- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index efba394ed6a..d72ffa5b252 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3244,13 +3244,13 @@ def pad( pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, mode: str = "constant", stat_length: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, constant_values: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, end_values: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, reflect_type: str = None, **pad_width_kwargs: Any, @@ -3345,7 +3345,7 @@ def pad( See also -------- - DataArray.shift, DataArray.roll, numpy.pad, dask.array.pad + DataArray.shift, DataArray.roll, DataArray.bfill, DataArray.ffill, numpy.pad, dask.array.pad Notes ----- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1123262db01..7b4d4e03dd6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5704,13 +5704,13 @@ def pad( pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, mode: str = "constant", stat_length: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, constant_values: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, end_values: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, reflect_type: str = None, **pad_width_kwargs: Any, @@ -5805,7 +5805,7 @@ def pad( See also -------- - Dataset.shift, Dataset.roll, numpy.pad, dask.array.pad + Dataset.shift, Dataset.roll, Dataset.bfill, Dataset.ffill, numpy.pad, dask.array.pad Notes ----- diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 5559404715e..ce27b118180 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -33,12 +33,6 @@ infix_dims, ) -try: - import dask.array as da -except ImportError: - pass - - NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, pd.Index, @@ -1168,13 +1162,13 @@ def pad( pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, mode: str = "constant", stat_length: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, constant_values: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, end_values: Union[ - None, int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] ] = None, reflect_type: str = None, **pad_width_kwargs: Any, @@ -1214,7 +1208,9 @@ def pad( pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") # change default behaviour of pad with mode constant - if mode == "constant" and constant_values is None: + if mode == "constant" and ( + constant_values is None or constant_values is dtypes.NA + ): dtype, constant_values = dtypes.maybe_promote(self.dtype) else: dtype = self.dtype diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 3268d1ec3a0..67b0c647344 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2043,7 +2043,7 @@ def test_pad(self, dtype): ((3, 1), (0, 0), (2, 0)), ] for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(**xr_arg) + actual = v.pad(**xr_arg, mode="constant") expected = xr.Variable( v.dims, np.pad( @@ -2061,7 +2061,7 @@ def test_pad(self, dtype): data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = xr.Variable(["x", "y", "z"], data) for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(fill_value=data.flat[0], **xr_arg) + actual = v.pad(**xr_arg, mode="constant", constant_values=data.flat[0]) expected = xr.Variable( v.dims, np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), From 6958da9415b1bf24c347a060528536eee4d0175c Mon Sep 17 00:00:00 2001 From: mark-boer Date: Sun, 8 Mar 2020 21:14:02 +0100 Subject: [PATCH 20/22] fix test_unit.test_pad_constant_values --- xarray/tests/test_units.py | 52 +++++++++++++++----------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 67b0c647344..bc784c6adc2 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -9,7 +9,7 @@ from xarray.core import formatting from xarray.core.npcompat import IS_NEP18_ACTIVE -from .test_variable import VariableSubclassobjects +from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects pint = pytest.importorskip("pint") DimensionalityError = pint.errors.DimensionalityError @@ -2032,42 +2032,32 @@ def test_no_conflicts(self, unit, dtype): assert expected == actual - def test_pad(self, dtype): + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad_constant_values(self, dtype, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2).astype(dtype) * unit_registry.m v = xr.Variable(["x", "y", "z"], data) - xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] - np_args = [ - ((2, 1), (0, 0), (0, 0)), - ((0, 0), (0, 3), (0, 0)), - ((3, 1), (0, 0), (2, 0)), - ] - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad(**xr_arg, mode="constant") - expected = xr.Variable( - v.dims, - np.pad( - v.data.astype(float), - np_arg, - mode="constant", - constant_values=np.nan, - ), - ) - xr.testing.assert_identical(expected, actual) - assert_units_equal(expected, actual) - assert isinstance(actual._data, type(v._data)) + actual = v.pad(**xr_arg, mode="constant") + expected = xr.Variable( + v.dims, + np.pad( + v.data.astype(float), np_arg, mode="constant", constant_values=np.nan, + ), + ) + xr.testing.assert_identical(expected, actual) + assert_units_equal(expected, actual) + assert isinstance(actual._data, type(v._data)) # for the boolean array, we pad False data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = xr.Variable(["x", "y", "z"], data) - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad(**xr_arg, mode="constant", constant_values=data.flat[0]) - expected = xr.Variable( - v.dims, - np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), - ) - xr.testing.assert_identical(actual, expected) - assert_units_equal(expected, actual) + actual = v.pad(**xr_arg, mode="constant", constant_values=data.flat[0]) + expected = xr.Variable( + v.dims, + np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), + ) + xr.testing.assert_identical(actual, expected) + assert_units_equal(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2089,7 +2079,7 @@ def test_pad(self, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_pad_with_fill_value(self, unit, error, dtype): + def test_pad_unit_constant_value(self, unit, error, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) From af0a4a1fd34359ee5b523bfd9c00c4f3f021e957 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 18 Mar 2020 15:57:17 -0600 Subject: [PATCH 21/22] Keewis review comments --- xarray/core/duck_array_ops.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d12d4a3d915..ff2d0af63ed 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -11,16 +11,14 @@ import numpy as np import pandas as pd -from . import dask_array_ops, dtypes, npcompat, nputils +from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type try: import dask.array as dask_array - from . import dask_array_compat except ImportError: dask_array = None # type: ignore - dask_array_compat = None # type: ignore def _dask_or_eager_func( @@ -116,7 +114,7 @@ def notnull(data): isin = _dask_or_eager_func("isin", array_args=slice(2)) take = _dask_or_eager_func("take") broadcast_to = _dask_or_eager_func("broadcast_to") -pad = _dask_or_eager_func("pad") +pad = _dask_or_eager_func("pad", dask_module=dask_array_compat) _concatenate = _dask_or_eager_func("concatenate", list_of_args=True) _stack = _dask_or_eager_func("stack", list_of_args=True) @@ -599,6 +597,3 @@ def rolling_window(array, axis, window, center, fill_value): return dask_array_ops.rolling_window(array, axis, window, center, fill_value) else: # np.ndarray return nputils.rolling_window(array, axis, window, center, fill_value) - - -pad = _dask_or_eager_func("pad", dask_module=dask_array_compat) From f781f72ff5e88c49993d0b791f6a93cebb62739c Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 19 Mar 2020 07:18:13 -0600 Subject: [PATCH 22/22] Add experimental warning --- xarray/core/dataarray.py | 4 ++++ xarray/core/dataset.py | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d72ffa5b252..4d9993d7383 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3257,6 +3257,10 @@ def pad( ) -> "DataArray": """Pad this array along one or more dimensions. + .. warning:: + This function is experimental and its behaviour is likely to change + especially regarding padding of dimension coordinates (or IndexVariables). + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), coordinates will be padded with the same mode, otherwise coordinates are padded using the "constant" mode with fill_value dtypes.NA. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7b4d4e03dd6..937bebc2bc8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5717,6 +5717,10 @@ def pad( ) -> "Dataset": """Pad this dataset along one or more dimensions. + .. warning:: + This function is experimental and its behaviour is likely to change + especially regarding padding of dimension coordinates (or IndexVariables). + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), coordinates will be padded with the same mode, otherwise coordinates are padded using the "constant" mode with fill_value dtypes.NA. @@ -5854,7 +5858,9 @@ def pad( ) else: variables[name] = var.pad( - pad_width=var_pad_width, mode=coord_pad_mode, **coord_pad_options, # type: ignore + pad_width=var_pad_width, + mode=coord_pad_mode, + **coord_pad_options, # type: ignore ) return self._replace_vars_and_dims(variables)