From 46c4931a140fd39991620b483d347bee9ee66afe Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 2 Nov 2019 16:33:33 -0400 Subject: [PATCH 01/15] python 3.8 tests (#3477) * python 3.8 tests * whatsnew * Update doc/whats-new.rst Co-Authored-By: crusaderky * Update doc/whats-new.rst Co-Authored-By: crusaderky --- azure-pipelines.yml | 2 ++ ci/requirements/py38.yml | 15 +++++++++++++++ doc/whats-new.rst | 8 ++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 ci/requirements/py38.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c7f9de73cf4..90de0705a27 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,6 +18,8 @@ jobs: conda_env: py36 py37: conda_env: py37 + py38: + conda_env: py38 py37-upstream-dev: conda_env: py37 upstream_dev: true diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml new file mode 100644 index 00000000000..9698e3efecf --- /dev/null +++ b/ci/requirements/py38.yml @@ -0,0 +1,15 @@ +name: xarray-tests +channels: + - conda-forge +dependencies: + - python=3.8 + - pip + - pip: + - coveralls + - dask + - distributed + - numpy + - pandas + - pytest + - pytest-cov + - pytest-env diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 47e2e58e988..c117382f23f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -78,7 +78,8 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) by `Mickaël Lalande `_. +- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) + by `Mickaël Lalande `_. - Fix the documentation of :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` and explicitly state that a datetime-like dimension is required. (:pull:`3400`) @@ -104,7 +105,10 @@ Internal Changes ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. -- Use Python 3.6 idioms throughout the codebase. (:pull:3419) +- Use Python 3.6 idioms throughout the codebase. (:pull:`3419`) + By `Maximilian Roos `_ + +- Run basic CI tests on Python 3.8. (:pull:`3477`) By `Maximilian Roos `_ From b649846b9ceef0db8631e7148f5ee9415bdd4621 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 5 Nov 2019 02:19:51 +0000 Subject: [PATCH 02/15] Propagate indexes in DataArray binary operations. (#3481) * Propagate indexes in DataArray binary operations. Works by propagating indexes in DataArray._replace. xref #2227. Tests pass! * remove commented code. * fix roll --- xarray/core/dataarray.py | 8 +++++--- xarray/core/dataset.py | 2 ++ xarray/core/groupby.py | 1 + xarray/core/indexes.py | 3 +++ xarray/tests/test_dataarray.py | 11 +++++++++++ xarray/tests/test_dataset.py | 8 ++++++++ 6 files changed, 30 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b61f83bcb1c..35ee90fb5c8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -386,6 +386,7 @@ def _replace( variable: Variable = None, coords=None, name: Union[Hashable, None, Default] = _default, + indexes=None, ) -> "DataArray": if variable is None: variable = self.variable @@ -393,7 +394,7 @@ def _replace( coords = self._coords if name is _default: name = self.name - return type(self)(variable, coords, name=name, fastpath=True) + return type(self)(variable, coords, name=name, fastpath=True, indexes=indexes) def _replace_maybe_drop_dims( self, variable: Variable, name: Union[Hashable, None, Default] = _default @@ -440,7 +441,8 @@ def _from_temp_dataset( ) -> "DataArray": variable = dataset._variables.pop(_THIS_ARRAY) coords = dataset._variables - return self._replace(variable, coords, name) + indexes = dataset._indexes + return self._replace(variable, coords, name, indexes=indexes) def _to_dataset_split(self, dim: Hashable) -> Dataset: def subset(dim, label): @@ -2506,7 +2508,7 @@ def func(self, other): coords, indexes = self.coords._merge_raw(other_coords) name = self._result_name(other) - return self._replace(variable, coords, name) + return self._replace(variable, coords, name, indexes=indexes) return func diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2b89051e84e..978242e5f6b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4891,6 +4891,8 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): (dim,) = self.variables[k].dims if dim in shifts: indexes[k] = roll_index(v, shifts[dim]) + else: + indexes[k] = v else: indexes = dict(self.indexes) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 353566eb345..209ac14184b 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -529,6 +529,7 @@ def _maybe_unstack(self, obj): for dim in self._inserted_dims: if dim in obj.coords: del obj.coords[dim] + del obj.indexes[dim] return obj def fillna(self, value): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index a96fbccbeee..1574f4f18df 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -35,6 +35,9 @@ def __contains__(self, key): def __getitem__(self, key): return self._indexes[key] + def __delitem__(self, key): + del self._indexes[key] + def __repr__(self): return formatting.indexes_repr(self) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5114d13b0dc..2c823b0c20a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3953,6 +3953,17 @@ def test_matmul(self): expected = da.dot(da) assert_identical(result, expected) + def test_binary_op_propagate_indexes(self): + # regression test for GH2227 + self.dv["x"] = np.arange(self.dv.sizes["x"]) + expected = self.dv.indexes["x"] + + actual = (self.dv * 10).indexes["x"] + assert expected is actual + + actual = (self.dv > 10).indexes["x"] + assert expected is actual + def test_binary_op_join_setting(self): dim = "x" align_type = "outer" diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index eab6040e17e..b9fa20fab26 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4951,6 +4951,14 @@ def test_filter_by_attrs(self): ) assert not bool(new_ds.data_vars) + def test_binary_op_propagate_indexes(self): + ds = Dataset( + {"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]})} + ) + expected = ds.indexes["x"] + actual = (ds * 2).indexes["x"] + assert expected is actual + def test_binary_op_join_setting(self): # arithmetic_join applies to data array coordinates missing_2 = xr.Dataset({"x": [0, 1]}) From af28c6b02fac08494f5d9ae2718d68a084d93949 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 5 Nov 2019 15:41:13 +0000 Subject: [PATCH 03/15] Optimize dask array equality checks. (#3453) * Optimize dask array equality checks. Dask arrays with the same graph have the same name. We can use this to quickly compare dask-backed variables without computing. Fixes #3068 and #3311 * better docstring * review suggestions. * add concat test * update whats new * Add identity check to lazy_array_equiv * pep8 * bugfix. --- doc/whats-new.rst | 3 + xarray/core/concat.py | 56 ++++++++++++------ xarray/core/duck_array_ops.py | 62 ++++++++++++++----- xarray/core/merge.py | 19 ++++-- xarray/core/variable.py | 14 +++-- xarray/tests/test_dask.py | 108 +++++++++++++++++++++++++++++++++- 6 files changed, 217 insertions(+), 45 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c117382f23f..dcaab011e67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -70,6 +70,9 @@ Bug fixes but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). By `Deepak Cherian `_. +- Use dask names to compare dask objects prior to comparing values after computation. + (:issue:`3068`, :issue:`3311`, :issue:`3454`, :pull:`3453`). + By `Deepak Cherian `_. - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 0d19990bdd0..c26153eb0d8 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -2,6 +2,7 @@ from . import dtypes, utils from .alignment import align +from .duck_array_ops import lazy_array_equiv from .merge import _VALID_COMPAT, unique_variable from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -189,26 +190,43 @@ def process_subset_opt(opt, subset): # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): if k not in concat_over: - # Compare the variable of all datasets vs. the one - # of the first dataset. Perform the minimum amount of - # loads in order to avoid multiple loads from disk - # while keeping the RAM footprint low. - v_lhs = datasets[0].variables[k].load() - # We'll need to know later on if variables are equal. - computed = [] - for ds_rhs in datasets[1:]: - v_rhs = ds_rhs.variables[k].compute() - computed.append(v_rhs) - if not getattr(v_lhs, compat)(v_rhs): - concat_over.add(k) - equals[k] = False - # computed variables are not to be re-computed - # again in the future - for ds, v in zip(datasets[1:], computed): - ds.variables[k].data = v.data + equals[k] = None + variables = [ds.variables[k] for ds in datasets] + # first check without comparing values i.e. no computes + for var in variables[1:]: + equals[k] = getattr(variables[0], compat)( + var, equiv=lazy_array_equiv + ) + if equals[k] is not True: + # exit early if we know these are not equal or that + # equality cannot be determined i.e. one or all of + # the variables wraps a numpy array break - else: - equals[k] = True + + if equals[k] is False: + concat_over.add(k) + + elif equals[k] is None: + # Compare the variable of all datasets vs. the one + # of the first dataset. Perform the minimum amount of + # loads in order to avoid multiple loads from disk + # while keeping the RAM footprint low. + v_lhs = datasets[0].variables[k].load() + # We'll need to know later on if variables are equal. + computed = [] + for ds_rhs in datasets[1:]: + v_rhs = ds_rhs.variables[k].compute() + computed.append(v_rhs) + if not getattr(v_lhs, compat)(v_rhs): + concat_over.add(k) + equals[k] = False + # computed variables are not to be re-computed + # again in the future + for ds, v in zip(datasets[1:], computed): + ds.variables[k].data = v.data + break + else: + equals[k] = True elif opt == "all": concat_over.update( diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d943788c434..71e79335c3d 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -174,14 +174,42 @@ def as_shared_dtype(scalars_or_arrays): return [x.astype(out_type, copy=False) for x in arrays] -def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): - """Like np.allclose, but also allows values to be NaN in both arrays +def lazy_array_equiv(arr1, arr2): + """Like array_equal, but doesn't actually compare values. + Returns True when arr1, arr2 identical or their dask names are equal. + Returns False when shapes are not equal. + Returns None when equality cannot determined: one or both of arr1, arr2 are numpy arrays; + or their dask names are not equal """ + if arr1 is arr2: + return True arr1 = asarray(arr1) arr2 = asarray(arr2) if arr1.shape != arr2.shape: return False - return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) + if ( + dask_array + and isinstance(arr1, dask_array.Array) + and isinstance(arr2, dask_array.Array) + ): + # GH3068 + if arr1.name == arr2.name: + return True + else: + return None + return None + + +def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): + """Like np.allclose, but also allows values to be NaN in both arrays + """ + arr1 = asarray(arr1) + arr2 = asarray(arr2) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) + else: + return lazy_equiv def array_equiv(arr1, arr2): @@ -189,12 +217,14 @@ def array_equiv(arr1, arr2): """ arr1 = asarray(arr1) arr2 = asarray(arr2) - if arr1.shape != arr2.shape: - return False - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "In the future, 'NAT == x'") - flag_array = (arr1 == arr2) | (isnull(arr1) & isnull(arr2)) - return bool(flag_array.all()) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "In the future, 'NAT == x'") + flag_array = (arr1 == arr2) | (isnull(arr1) & isnull(arr2)) + return bool(flag_array.all()) + else: + return lazy_equiv def array_notnull_equiv(arr1, arr2): @@ -203,12 +233,14 @@ def array_notnull_equiv(arr1, arr2): """ arr1 = asarray(arr1) arr2 = asarray(arr2) - if arr1.shape != arr2.shape: - return False - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "In the future, 'NAT == x'") - flag_array = (arr1 == arr2) | isnull(arr1) | isnull(arr2) - return bool(flag_array.all()) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "In the future, 'NAT == x'") + flag_array = (arr1 == arr2) | isnull(arr1) | isnull(arr2) + return bool(flag_array.all()) + else: + return lazy_equiv def count(data, axis=None): diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 389ceb155f7..daf0c3b059f 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -19,6 +19,7 @@ from . import dtypes, pdcompat from .alignment import deep_align +from .duck_array_ops import lazy_array_equiv from .utils import Frozen, dict_equiv from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -123,16 +124,24 @@ def unique_variable( combine_method = "fillna" if equals is None: - out = out.compute() + # first check without comparing values i.e. no computes for var in variables[1:]: - equals = getattr(out, compat)(var) - if not equals: + equals = getattr(out, compat)(var, equiv=lazy_array_equiv) + if equals is not True: break + if equals is None: + # now compare values with minimum number of computes + out = out.compute() + for var in variables[1:]: + equals = getattr(out, compat)(var) + if not equals: + break + if not equals: raise MergeError( - "conflicting values for variable {!r} on objects to be combined. " - "You can skip this check by specifying compat='override'.".format(name) + f"conflicting values for variable {name!r} on objects to be combined. " + "You can skip this check by specifying compat='override'." ) if combine_method: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 117ab85ae65..916df75b3e0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1236,7 +1236,9 @@ def transpose(self, *dims) -> "Variable": dims = self.dims[::-1] dims = tuple(infix_dims(dims, self.dims)) axes = self.get_axis_num(dims) - if len(dims) < 2: # no need to transpose if only one dimension + if len(dims) < 2 or dims == self.dims: + # no need to transpose if only one dimension + # or dims are in same order return self.copy(deep=False) data = as_indexable(self._data).transpose(axes) @@ -1595,22 +1597,24 @@ def broadcast_equals(self, other, equiv=duck_array_ops.array_equiv): return False return self.equals(other, equiv=equiv) - def identical(self, other): + def identical(self, other, equiv=duck_array_ops.array_equiv): """Like equals, but also checks attributes. """ try: - return utils.dict_equiv(self.attrs, other.attrs) and self.equals(other) + return utils.dict_equiv(self.attrs, other.attrs) and self.equals( + other, equiv=equiv + ) except (TypeError, AttributeError): return False - def no_conflicts(self, other): + def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """True if the intersection of two Variable's non-null data is equal; otherwise false. Variables can thus still be equal if there are locations where either, or both, contain NaN values. """ - return self.broadcast_equals(other, equiv=duck_array_ops.array_notnull_equiv) + return self.broadcast_equals(other, equiv=equiv) def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index c4323d1d317..34115b29b23 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -24,6 +24,7 @@ raises_regex, requires_scipy_or_netCDF4, ) +from ..core.duck_array_ops import lazy_array_equiv from .test_backends import create_tmp_file dask = pytest.importorskip("dask") @@ -428,7 +429,53 @@ def test_concat_loads_variables(self): out.compute() assert kernel_call_count == 24 - # Finally, test that riginals are unaltered + # Finally, test that originals are unaltered + assert ds1["d"].data is d1 + assert ds1["c"].data is c1 + assert ds2["d"].data is d2 + assert ds2["c"].data is c2 + assert ds3["d"].data is d3 + assert ds3["c"].data is c3 + + # now check that concat() is correctly using dask name equality to skip loads + out = xr.concat( + [ds1, ds1, ds1], dim="n", data_vars="different", coords="different" + ) + assert kernel_call_count == 24 + # variables are not loaded in the output + assert isinstance(out["d"].data, dask.array.Array) + assert isinstance(out["c"].data, dask.array.Array) + + out = xr.concat( + [ds1, ds1, ds1], dim="n", data_vars=[], coords=[], compat="identical" + ) + assert kernel_call_count == 24 + # variables are not loaded in the output + assert isinstance(out["d"].data, dask.array.Array) + assert isinstance(out["c"].data, dask.array.Array) + + out = xr.concat( + [ds1, ds2.compute(), ds3], + dim="n", + data_vars="all", + coords="different", + compat="identical", + ) + # c1,c3 must be computed for comparison since c2 is numpy; + # d2 is computed too + assert kernel_call_count == 28 + + out = xr.concat( + [ds1, ds2.compute(), ds3], + dim="n", + data_vars="all", + coords="all", + compat="identical", + ) + # no extra computes + assert kernel_call_count == 30 + + # Finally, test that originals are unaltered assert ds1["d"].data is d1 assert ds1["c"].data is c1 assert ds2["d"].data is d2 @@ -1142,6 +1189,19 @@ def test_make_meta(map_ds): assert meta.data_vars[variable].shape == (0,) * meta.data_vars[variable].ndim +def test_identical_coords_no_computes(): + lons2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + a = xr.DataArray( + da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} + ) + b = xr.DataArray( + da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} + ) + with raise_if_dask_computes(): + c = a + b + assert_identical(c, a) + + @pytest.mark.parametrize( "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] ) @@ -1229,3 +1289,49 @@ def test_normalize_token_with_backend(map_ds): map_ds.to_netcdf(tmp_file) read = xr.open_dataset(tmp_file) assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) + + +@pytest.mark.parametrize( + "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] +) +def test_lazy_array_equiv_variables(compat): + var1 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) + var2 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) + var3 = xr.Variable(("y", "x"), da.zeros((20, 10), chunks=2)) + + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2, equiv=lazy_array_equiv) + # values are actually equal, but we don't know that till we compute, return None + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2 / 2, equiv=lazy_array_equiv) is None + + # shapes are not equal, return False without computes + with raise_if_dask_computes(): + assert getattr(var1, compat)(var3, equiv=lazy_array_equiv) is False + + # if one or both arrays are numpy, return None + assert getattr(var1, compat)(var2.compute(), equiv=lazy_array_equiv) is None + assert ( + getattr(var1.compute(), compat)(var2.compute(), equiv=lazy_array_equiv) is None + ) + + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2.transpose("y", "x")) + + +@pytest.mark.parametrize( + "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] +) +def test_lazy_array_equiv_merge(compat): + da1 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + da2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + da3 = xr.DataArray(da.ones((20, 10), chunks=2), dims=("y", "x")) + + with raise_if_dask_computes(): + xr.merge([da1, da2], compat=compat) + # shapes are not equal; no computes necessary + with raise_if_dask_computes(max_computes=0): + with pytest.raises(ValueError): + xr.merge([da1, da3], compat=compat) + with raise_if_dask_computes(max_computes=2): + xr.merge([da1, da2 / 2], compat=compat) From 4dce93f134e8296ea730104b46ce3372b90304ac Mon Sep 17 00:00:00 2001 From: barronh Date: Tue, 5 Nov 2019 10:42:34 -0500 Subject: [PATCH 04/15] uamiv test using only raw uamiv variables (#3485) * uamiv test using only raw uamiv variables Previous test relied on CF generated metadata, but this test is more robust. * uamiv test using only raw uamiv variables Previous test relied on CF generated metadata, but this test is more robust. * uamiv test using only raw uamiv variables Previous test relied on CF generated metadata, but this test is more robust. * uamiv test using only raw uamiv variables --- xarray/tests/test_backends.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4bdebe73050..9b000b82b03 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3400,20 +3400,17 @@ def test_uamiv_format_read(self): actual = camxfile.variables["O3"] assert_allclose(expected, actual) - data = np.array(["2002-06-03"], "datetime64[ns]") + data = np.array([[[2002154, 0]]], dtype="i") expected = xr.Variable( - ("TSTEP",), + ("TSTEP", "VAR", "DATE-TIME"), data, dict( - bounds="time_bounds", - long_name=( - "synthesized time coordinate " - + "from SDATE, STIME, STEP " - + "global attributes" - ), + long_name="TFLAG".ljust(16), + var_desc="TFLAG".ljust(80), + units="DATE-TIME".ljust(16), ), ) - actual = camxfile.variables["time"] + actual = camxfile.variables["TFLAG"] assert_allclose(expected, actual) camxfile.close() @@ -3439,18 +3436,15 @@ def test_uamiv_format_mfread(self): actual = camxfile.variables["O3"] assert_allclose(expected, actual) - data1 = np.array(["2002-06-03"], "datetime64[ns]") - data = np.concatenate([data1] * 2, axis=0) + data = np.array([[[2002154, 0]]], dtype="i").repeat(2, 0) attrs = dict( - bounds="time_bounds", - long_name=( - "synthesized time coordinate " - + "from SDATE, STIME, STEP " - + "global attributes" - ), + long_name="TFLAG".ljust(16), + var_desc="TFLAG".ljust(80), + units="DATE-TIME".ljust(16), ) - expected = xr.Variable(("TSTEP",), data, attrs) - actual = camxfile.variables["time"] + dims = ("TSTEP", "VAR", "DATE-TIME") + expected = xr.Variable(dims, data, attrs) + actual = camxfile.variables["TFLAG"] assert_allclose(expected, actual) camxfile.close() From 0e8debfe28286b5fe1f3d27e8dcc8466a62aca6d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 7 Nov 2019 15:13:50 -0500 Subject: [PATCH 05/15] drop_vars; deprecate drop for variables (#3475) * Deprecate drop for vars, in favor of drop_vars * docs tweaks * handle scalars as vars * allow warning in old whatsnew * add drop_sel, adjust deprecations based on comments * whatsnew * docs * old-whatsnew * docstring * pendingdeprecationwarning * whatsnew * whatsnew * move units tests to drop_sel * is_scalar (but retain isinstance for mypy) --- doc/data-structures.rst | 4 +- doc/indexing.rst | 6 +- doc/whats-new.rst | 7 ++ xarray/core/concat.py | 2 +- xarray/core/dataarray.py | 78 ++++++++---- xarray/core/dataset.py | 180 +++++++++++++++------------- xarray/core/groupby.py | 2 +- xarray/core/merge.py | 2 +- xarray/core/resample.py | 6 +- xarray/tests/test_backends.py | 8 +- xarray/tests/test_dask.py | 10 +- xarray/tests/test_dataarray.py | 45 +++---- xarray/tests/test_dataset.py | 115 +++++++++++------- xarray/tests/test_duck_array_ops.py | 3 +- xarray/tests/test_interp.py | 2 +- xarray/tests/test_plot.py | 6 +- xarray/tests/test_units.py | 6 +- 17 files changed, 286 insertions(+), 196 deletions(-) diff --git a/doc/data-structures.rst b/doc/data-structures.rst index d5567f4863e..93cdc7e9765 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -393,14 +393,14 @@ methods (like pandas) for transforming datasets into new objects. For removing variables, you can select and drop an explicit list of variables by indexing with a list of names or using the -:py:meth:`~xarray.Dataset.drop` methods to return a new ``Dataset``. These +:py:meth:`~xarray.Dataset.drop_vars` methods to return a new ``Dataset``. These operations keep around coordinates: .. ipython:: python ds[['temperature']] ds[['temperature', 'temperature_double']] - ds.drop('temperature') + ds.drop_vars('temperature') To remove a dimension, you can use :py:meth:`~xarray.Dataset.drop_dims` method. Any variables using that dimension are dropped: diff --git a/doc/indexing.rst b/doc/indexing.rst index 9ee8f1dddf8..ace960689a8 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -232,14 +232,14 @@ Using indexing to *assign* values to a subset of dataset (e.g., Dropping labels and dimensions ------------------------------ -The :py:meth:`~xarray.Dataset.drop` method returns a new object with the listed +The :py:meth:`~xarray.Dataset.drop_sel` method returns a new object with the listed index labels along a dimension dropped: .. ipython:: python - ds.drop(space=['IN', 'IL']) + ds.drop_sel(space=['IN', 'IL']) -``drop`` is both a ``Dataset`` and ``DataArray`` method. +``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. Use :py:meth:`~xarray.Dataset.drop_dims` to drop a full dimension from a Dataset. Any variables with these dimensions are also dropped: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dcaab011e67..0906058469d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,12 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels. + :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for + dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible + option for dropping either lables or variables, but using the more specific methods is encouraged. + (:pull:`3475`) + By `Maximilian Roos `_ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) to represent all 'other' dimensions. For example, to move one dimension to the front, use `.transpose('x', ...)`. (:pull:`3421`) @@ -3752,6 +3758,7 @@ Enhancements explicitly listed variables or index labels: .. ipython:: python + :okwarning: # drop variables ds = xray.Dataset({'x': 0, 'y': 1}) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index c26153eb0d8..5b4fc078236 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -388,7 +388,7 @@ def ensure_common_dims(vars): result = result.set_coords(coord_names) result.encoding = result_encoding - result = result.drop(unlabeled_dims, errors="ignore") + result = result.drop_vars(unlabeled_dims, errors="ignore") if coord is not None: # add concat dimension last to ensure that its in the final Dataset diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 35ee90fb5c8..d2d37871ee9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -16,7 +16,6 @@ TypeVar, Union, cast, - overload, ) import numpy as np @@ -53,7 +52,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes from .options import OPTIONS -from .utils import Default, ReprObject, _default, _check_inplace, either_dict_or_kwargs +from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( IndexVariable, Variable, @@ -249,7 +248,7 @@ class DataArray(AbstractArray, DataWithCoords): Dictionary for holding arbitrary metadata. """ - _accessors: Optional[Dict[str, Any]] + _accessors: Optional[Dict[str, Any]] # noqa _coords: Dict[Any, Variable] _indexes: Optional[Dict[Hashable, pd.Index]] _name: Optional[Hashable] @@ -1890,41 +1889,72 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra def T(self) -> "DataArray": return self.transpose() - # Drop coords - @overload - def drop( - self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" + def drop_vars( + self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "DataArray": - ... + """Drop variables from this DataArray. + + Parameters + ---------- + names : hashable or iterable of hashables + Name(s) of variables to drop. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if any of the variable + passed are not in the dataset. If 'ignore', any given names that are in the + DataArray are dropped and no error is raised. + + Returns + ------- + dropped : Dataset + + """ + ds = self._to_temp_dataset().drop_vars(names, errors=errors) + return self._from_temp_dataset(ds) - # Drop index labels along dimension - @overload # noqa: F811 def drop( - self, labels: Any, dim: Hashable, *, errors: str = "raise" # array-like + self, + labels: Mapping = None, + dim: Hashable = None, + *, + errors: str = "raise", + **labels_kwargs, ) -> "DataArray": - ... + """Backward compatible method based on `drop_vars` and `drop_sel` - def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811 - """Drop coordinates or index labels from this DataArray. + Using either `drop_vars` or `drop_sel` is encouraged + """ + ds = self._to_temp_dataset().drop(labels, dim, errors=errors) + return self._from_temp_dataset(ds) + + def drop_sel( + self, + labels: Mapping[Hashable, Any] = None, + *, + errors: str = "raise", + **labels_kwargs, + ) -> "DataArray": + """Drop index labels from this DataArray. Parameters ---------- - labels : hashable or sequence of hashables - Name(s) of coordinates or index labels to drop. - If dim is not None, labels can be any array-like. - dim : hashable, optional - Dimension along which to drop index labels. By default (if - ``dim is None``), drops coordinates rather than index labels. + labels : Mapping[Hashable, Any] + Index labels to drop errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if - any of the coordinates or index labels passed are not - in the array. If 'ignore', any given labels that are in the - array are dropped and no error is raised. + any of the index labels passed are not + in the dataset. If 'ignore', any given labels that are in the + dataset are dropped and no error is raised. + **labels_kwargs : {dim: label, ...}, optional + The keyword arguments form of ``dim`` and ``labels`` + Returns ------- dropped : DataArray """ - ds = self._to_temp_dataset().drop(labels, dim, errors=errors) + if labels_kwargs or isinstance(labels, dict): + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + + ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) def dropna( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 978242e5f6b..2cadc90334c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -25,7 +25,6 @@ TypeVar, Union, cast, - overload, ) import numpy as np @@ -80,6 +79,7 @@ hashable, is_dict_like, is_list_like, + is_scalar, maybe_wrap_array, ) from .variable import IndexVariable, Variable, as_variable, broadcast_variables @@ -3519,39 +3519,98 @@ def _assert_all_in_dataset( "cannot be found in this dataset" ) - # Drop variables - @overload # noqa: F811 - def drop( - self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" + def drop_vars( + self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "Dataset": - ... + """Drop variables from this dataset. - # Drop index labels along dimension - @overload # noqa: F811 - def drop( - self, labels: Any, dim: Hashable, *, errors: str = "raise" # array-like - ) -> "Dataset": - ... + Parameters + ---------- + names : hashable or iterable of hashables + Name(s) of variables to drop. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if any of the variable + passed are not in the dataset. If 'ignore', any given names that are in the + dataset are dropped and no error is raised. - def drop( # noqa: F811 - self, labels=None, dim=None, *, errors="raise", **labels_kwargs - ): - """Drop variables or index labels from this dataset. + Returns + ------- + dropped : Dataset + + """ + # the Iterable check is required for mypy + if is_scalar(names) or not isinstance(names, Iterable): + names = {names} + else: + names = set(names) + if errors == "raise": + self._assert_all_in_dataset(names) + + variables = {k: v for k, v in self._variables.items() if k not in names} + coord_names = {k for k in self._coord_names if k in variables} + indexes = {k: v for k, v in self.indexes.items() if k not in names} + return self._replace_with_new_dims( + variables, coord_names=coord_names, indexes=indexes + ) + + def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): + """Backward compatible method based on `drop_vars` and `drop_sel` + + Using either `drop_vars` or `drop_sel` is encouraged + """ + if errors not in ["raise", "ignore"]: + raise ValueError('errors must be either "raise" or "ignore"') + + if is_dict_like(labels) and not isinstance(labels, dict): + warnings.warn( + "dropping coordinates using `drop` is be deprecated; use drop_vars.", + FutureWarning, + stacklevel=2, + ) + return self.drop_vars(labels, errors=errors) + + if labels_kwargs or isinstance(labels, dict): + if dim is not None: + raise ValueError("cannot specify dim and dict-like arguments.") + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + + if dim is None and (is_list_like(labels) or is_scalar(labels)): + warnings.warn( + "dropping variables using `drop` will be deprecated; using drop_vars is encouraged.", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.drop_vars(labels, errors=errors) + if dim is not None: + warnings.warn( + "dropping labels using list-like labels is deprecated; using " + "dict-like arguments with `drop_sel`, e.g. `ds.drop_sel(dim=[labels]).", + DeprecationWarning, + stacklevel=2, + ) + return self.drop_sel({dim: labels}, errors=errors, **labels_kwargs) + + warnings.warn( + "dropping labels using `drop` will be deprecated; using drop_sel is encouraged.", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.drop_sel(labels, errors=errors) + + def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): + """Drop index labels from this dataset. Parameters ---------- - labels : hashable or iterable of hashables - Name(s) of variables or index labels to drop. - dim : None or hashable, optional - Dimension along which to drop index labels. By default (if - ``dim is None``), drops variables rather than index labels. + labels : Mapping[Hashable, Any] + Index labels to drop errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if - any of the variable or index labels passed are not + any of the index labels passed are not in the dataset. If 'ignore', any given labels that are in the dataset are dropped and no error is raised. **labels_kwargs : {dim: label, ...}, optional - The keyword arguments form of ``dim`` and ``labels``. + The keyword arguments form of ``dim`` and ``labels` Returns ------- @@ -3562,7 +3621,7 @@ def drop( # noqa: F811 >>> data = np.random.randn(2, 3) >>> labels = ['a', 'b', 'c'] >>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels}) - >>> ds.drop(y=['a', 'c']) + >>> ds.drop_sel(y=['a', 'c']) Dimensions: (x: 2, y: 1) Coordinates: @@ -3570,7 +3629,7 @@ def drop( # noqa: F811 Dimensions without coordinates: x Data variables: A (x, y) float64 -0.3454 0.1734 - >>> ds.drop(y='b') + >>> ds.drop_sel(y='b') Dimensions: (x: 2, y: 2) Coordinates: @@ -3582,61 +3641,22 @@ def drop( # noqa: F811 if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - if is_dict_like(labels) and not isinstance(labels, dict): - warnings.warn( - "dropping coordinates using key values of dict-like labels is " - "deprecated; use drop_vars or a list of coordinates.", - FutureWarning, - stacklevel=2, - ) - if dim is not None and is_list_like(labels): - warnings.warn( - "dropping dimensions using list-like labels is deprecated; use " - "dict-like arguments.", - DeprecationWarning, - stacklevel=2, - ) + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") - if labels_kwargs or isinstance(labels, dict): - labels_kwargs = either_dict_or_kwargs(labels, labels_kwargs, "drop") - if dim is not None: - raise ValueError("cannot specify dim and dict-like arguments.") - ds = self - for dim, labels in labels_kwargs.items(): - ds = ds._drop_labels(labels, dim, errors=errors) - return ds - elif dim is None: - if isinstance(labels, str) or not isinstance(labels, Iterable): - labels = {labels} - else: - labels = set(labels) - return self._drop_vars(labels, errors=errors) - else: - return self._drop_labels(labels, dim, errors=errors) - - def _drop_labels(self, labels=None, dim=None, errors="raise"): - # Don't cast to set, as it would harm performance when labels - # is a large numpy array - if utils.is_scalar(labels): - labels = [labels] - labels = np.asarray(labels) - try: - index = self.indexes[dim] - except KeyError: - raise ValueError("dimension %r does not have coordinate labels" % dim) - new_index = index.drop(labels, errors=errors) - return self.loc[{dim: new_index}] - - def _drop_vars(self, names: set, errors: str = "raise") -> "Dataset": - if errors == "raise": - self._assert_all_in_dataset(names) - - variables = {k: v for k, v in self._variables.items() if k not in names} - coord_names = {k for k in self._coord_names if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k not in names} - return self._replace_with_new_dims( - variables, coord_names=coord_names, indexes=indexes - ) + ds = self + for dim, labels_for_dim in labels.items(): + # Don't cast to set, as it would harm performance when labels + # is a large numpy array + if utils.is_scalar(labels_for_dim): + labels_for_dim = [labels_for_dim] + labels_for_dim = np.asarray(labels_for_dim) + try: + index = self.indexes[dim] + except KeyError: + raise ValueError("dimension %r does not have coordinate labels" % dim) + new_index = index.drop(labels_for_dim, errors=errors) + ds = ds.loc[{dim: new_index}] + return ds def drop_dims( self, drop_dims: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" @@ -3679,7 +3699,7 @@ def drop_dims( ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} - return self._drop_vars(drop_vars) + return self.drop_vars(drop_vars) def transpose(self, *dims: Hashable) -> "Dataset": """Return a new Dataset object with all array dimensions transposed. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 209ac14184b..c8906e34737 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -775,7 +775,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): ) if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop("quantile") + out = out.drop_vars("quantile") return out def reduce( diff --git a/xarray/core/merge.py b/xarray/core/merge.py index daf0c3b059f..10c7804d718 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -859,6 +859,6 @@ def dataset_update_method( if c not in value.dims and c in dataset.coords ] if coord_names: - other[key] = value.drop(coord_names) + other[key] = value.drop_vars(coord_names) return merge_core([dataset, other], priority_arg=1, indexes=dataset.indexes) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 998964273be..2cb1bd55e19 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -47,7 +47,7 @@ def _upsample(self, method, *args, **kwargs): if k == self._dim: continue if self._dim in v.dims: - self._obj = self._obj.drop(k) + self._obj = self._obj.drop_vars(k) if method == "asfreq": return self.mean(self._dim) @@ -146,7 +146,7 @@ def _interpolate(self, kind="linear"): dummy = self._obj.copy() for k, v in self._obj.coords.items(): if k != self._dim and self._dim in v.dims: - dummy = dummy.drop(k) + dummy = dummy.drop_vars(k) return dummy.interp( assume_sorted=True, method=kind, @@ -218,7 +218,7 @@ def apply(self, func, shortcut=False, args=(), **kwargs): # dimension, then we need to do so before we can rename the proxy # dimension we used. if self._dim in combined.coords: - combined = combined.drop(self._dim) + combined = combined.drop_vars(self._dim) if self._resample_dim in combined.dims: combined = combined.rename({self._resample_dim: self._dim}) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 9b000b82b03..de3a7eadab0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -800,7 +800,7 @@ def equals_latlon(obj): assert "coordinates" not in ds["lat"].attrs assert "coordinates" not in ds["lon"].attrs - modified = original.drop(["temp", "precip"]) + modified = original.drop_vars(["temp", "precip"]) with self.roundtrip(modified) as actual: assert_identical(actual, modified) with create_tmp_file() as tmp_file: @@ -2177,7 +2177,7 @@ def test_cross_engine_read_write_netcdf4(self): # Drop dim3, because its labels include strings. These appear to be # not properly read with python-netCDF4, which converts them into # unicode instead of leaving them as bytes. - data = create_test_data().drop("dim3") + data = create_test_data().drop_vars("dim3") data.attrs["foo"] = "bar" valid_engines = ["netcdf4", "h5netcdf"] for write_engine in valid_engines: @@ -2344,7 +2344,7 @@ def test_open_twice(self): def test_open_fileobj(self): # open in-memory datasets instead of local file paths - expected = create_test_data().drop("dim3") + expected = create_test_data().drop_vars("dim3") expected.attrs["foo"] = "bar" with create_tmp_file() as tmp_file: expected.to_netcdf(tmp_file, engine="h5netcdf") @@ -4190,7 +4190,7 @@ def test_open_dataarray_options(self): with create_tmp_file() as tmp: data.to_netcdf(tmp) - expected = data.drop("y") + expected = data.drop_vars("y") with open_dataarray(tmp, drop_variables=["y"]) as loaded: assert_identical(expected, loaded) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 34115b29b23..fa8ae9991d7 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1129,11 +1129,11 @@ def test_map_blocks_to_array(map_ds): [ lambda x: x, lambda x: x.to_dataset(), - lambda x: x.drop("x"), + lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), - # TODO: [lambda x: x.isel(x=1).drop("x"), map_da], + # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], ], ) def test_map_blocks_da_transformations(func, map_da): @@ -1147,9 +1147,9 @@ def test_map_blocks_da_transformations(func, map_da): "func", [ lambda x: x, - lambda x: x.drop("cxy"), - lambda x: x.drop("a"), - lambda x: x.drop("x"), + lambda x: x.drop_vars("cxy"), + lambda x: x.drop_vars("a"), + lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.rename({"a": "new1", "b": "new2"}), # TODO: [lambda x: x.isel(x=1)], diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2c823b0c20a..acfe684d220 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -906,7 +906,7 @@ def test_sel_dataarray(self): assert_array_equal(actual, da.isel(x=[0, 1, 2])) assert "new_dim" in actual.dims assert "new_dim" in actual.coords - assert_equal(actual["new_dim"].drop("x"), ind["new_dim"]) + assert_equal(actual["new_dim"].drop_vars("x"), ind["new_dim"]) def test_sel_invalid_slice(self): array = DataArray(np.arange(10), [("x", np.arange(10))]) @@ -1660,7 +1660,7 @@ def test_expand_dims_with_greater_dim_size(self): coords=expected_coords, dims=list(expected_coords.keys()), attrs={"key": "entry"}, - ).drop(["y", "dim_0"]) + ).drop_vars(["y", "dim_0"]) assert_identical(expected, actual) # Test with kwargs instead of passing dict to dim arg. @@ -1677,7 +1677,7 @@ def test_expand_dims_with_greater_dim_size(self): }, dims=["dim_1", "x", "dim_0"], attrs={"key": "entry"}, - ).drop("dim_0") + ).drop_vars("dim_0") assert_identical(other_way_expected, other_way) def test_set_index(self): @@ -1993,7 +1993,7 @@ def test_stack_unstack(self): ) pd.util.testing.assert_index_equal(a, b) - actual = orig.stack(z=["x", "y"]).unstack("z").drop(["x", "y"]) + actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) assert_identical(orig, actual) dims = ["a", "b", "c", "d", "e"] @@ -2001,11 +2001,11 @@ def test_stack_unstack(self): stacked = orig.stack(ab=["a", "b"], cd=["c", "d"]) unstacked = stacked.unstack(["ab", "cd"]) - roundtripped = unstacked.drop(["a", "b", "c", "d"]).transpose(*dims) + roundtripped = unstacked.drop_vars(["a", "b", "c", "d"]).transpose(*dims) assert_identical(orig, roundtripped) unstacked = stacked.unstack() - roundtripped = unstacked.drop(["a", "b", "c", "d"]).transpose(*dims) + roundtripped = unstacked.drop_vars(["a", "b", "c", "d"]).transpose(*dims) assert_identical(orig, roundtripped) def test_stack_unstack_decreasing_coordinate(self): @@ -2109,40 +2109,43 @@ def test_drop_coordinates(self): expected = DataArray(np.random.randn(2, 3), dims=["x", "y"]) arr = expected.copy() arr.coords["z"] = 2 - actual = arr.drop("z") + actual = arr.drop_vars("z") assert_identical(expected, actual) with pytest.raises(ValueError): - arr.drop("not found") + arr.drop_vars("not found") - actual = expected.drop("not found", errors="ignore") + actual = expected.drop_vars("not found", errors="ignore") assert_identical(actual, expected) with raises_regex(ValueError, "cannot be found"): - arr.drop("w") + arr.drop_vars("w") - actual = expected.drop("w", errors="ignore") + actual = expected.drop_vars("w", errors="ignore") assert_identical(actual, expected) renamed = arr.rename("foo") with raises_regex(ValueError, "cannot be found"): - renamed.drop("foo") + renamed.drop_vars("foo") - actual = renamed.drop("foo", errors="ignore") + actual = renamed.drop_vars("foo", errors="ignore") assert_identical(actual, renamed) def test_drop_index_labels(self): arr = DataArray(np.random.randn(2, 3), coords={"y": [0, 1, 2]}, dims=["x", "y"]) - actual = arr.drop([0, 1], dim="y") + actual = arr.drop_sel(y=[0, 1]) expected = arr[:, 2:] assert_identical(actual, expected) with raises_regex((KeyError, ValueError), "not .* in axis"): - actual = arr.drop([0, 1, 3], dim="y") + actual = arr.drop_sel(y=[0, 1, 3]) - actual = arr.drop([0, 1, 3], dim="y", errors="ignore") + actual = arr.drop_sel(y=[0, 1, 3], errors="ignore") assert_identical(actual, expected) + with pytest.warns(DeprecationWarning): + arr.drop([0, 1, 3], dim="y", errors="ignore") + def test_dropna(self): x = np.random.randn(4, 4) x[::2, 0] = np.nan @@ -3360,7 +3363,7 @@ def test_to_pandas(self): da = DataArray(np.random.randn(*shape), dims=dims) with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - roundtripped = DataArray(da.to_pandas()).drop(dims) + roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) with raises_regex(ValueError, "cannot convert"): @@ -3411,11 +3414,13 @@ def test_to_and_from_series(self): assert_array_equal(expected.index.values, actual.index.values) assert "foo" == actual.name # test roundtrip - assert_identical(self.dv, DataArray.from_series(actual).drop(["x", "y"])) + assert_identical(self.dv, DataArray.from_series(actual).drop_vars(["x", "y"])) # test name is None actual.name = None expected_da = self.dv.rename(None) - assert_identical(expected_da, DataArray.from_series(actual).drop(["x", "y"])) + assert_identical( + expected_da, DataArray.from_series(actual).drop_vars(["x", "y"]) + ) @requires_sparse def test_from_series_sparse(self): @@ -3478,7 +3483,7 @@ def test_to_and_from_dict(self): # and the most bare bones representation still roundtrips d = {"name": "foo", "dims": ("x", "y"), "data": array.values} - assert_identical(array.drop("x"), DataArray.from_dict(d)) + assert_identical(array.drop_vars("x"), DataArray.from_dict(d)) # missing a dims in the coords d = { diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b9fa20fab26..50e78c9f685 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -322,7 +322,7 @@ def __repr__(self): def test_info(self): ds = create_test_data(seed=123) - ds = ds.drop("dim3") # string type prints differently in PY2 vs PY3 + ds = ds.drop_vars("dim3") # string type prints differently in PY2 vs PY3 ds.attrs["unicode_attr"] = "ba®" ds.attrs["string_attr"] = "bar" @@ -509,7 +509,9 @@ def test_constructor_compat(self): {"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]}, ) - actual = Dataset({"a": original["a"][:, 0], "b": original["a"][0].drop("x")}) + actual = Dataset( + {"a": original["a"][:, 0], "b": original["a"][0].drop_vars("x")} + ) assert_identical(expected, actual) data = {"x": DataArray(0, coords={"y": 3}), "y": ("z", [1, 1, 1])} @@ -775,9 +777,9 @@ def test_coords_set(self): one_coord.reset_coords("x") actual = all_coords.reset_coords("zzz", drop=True) - expected = all_coords.drop("zzz") + expected = all_coords.drop_vars("zzz") assert_identical(expected, actual) - expected = two_coords.drop("zzz") + expected = two_coords.drop_vars("zzz") assert_identical(expected, actual) def test_coords_to_dataset(self): @@ -954,7 +956,7 @@ def test_dask_is_lazy(self): ds.fillna(0) ds.rename({"dim1": "foobar"}) ds.set_coords("var1") - ds.drop("var1") + ds.drop_vars("var1") def test_isel(self): data = create_test_data() @@ -1097,7 +1099,7 @@ def test_isel_fancy(self): actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"]) assert "station" in actual.coords assert "station" in actual.dims - assert_identical(actual["station"].drop(["dim2"]), stations["station"]) + assert_identical(actual["station"].drop_vars(["dim2"]), stations["station"]) with raises_regex(ValueError, "conflicting values for "): data.isel( @@ -1123,7 +1125,7 @@ def test_isel_fancy(self): assert "dim2" in actual.coords assert "a" in actual["dim2"].dims - assert_identical(actual["a"].drop(["dim2"]), stations["a"]) + assert_identical(actual["a"].drop_vars(["dim2"]), stations["a"]) assert_identical(actual["b"], stations["b"]) expected_var1 = data["var1"].variable[ stations["dim1s"].variable, stations["dim2s"].variable @@ -1132,7 +1134,7 @@ def test_isel_fancy(self): stations["dim1s"].variable, stations["dim2s"].variable ] expected_var3 = data["var3"].variable[slice(None), stations["dim1s"].variable] - assert_equal(actual["a"].drop("dim2"), stations["a"]) + assert_equal(actual["a"].drop_vars("dim2"), stations["a"]) assert_array_equal(actual["var1"], expected_var1) assert_array_equal(actual["var2"], expected_var2) assert_array_equal(actual["var3"], expected_var3) @@ -1200,7 +1202,7 @@ def test_isel_dataarray(self): indexing_da = indexing_da < 3 actual = data.isel(dim2=indexing_da) assert_identical( - actual["dim2"].drop("non_dim").drop("non_dim2"), data["dim2"][:2] + actual["dim2"].drop_vars("non_dim").drop_vars("non_dim2"), data["dim2"][:2] ) assert_identical(actual["non_dim"], indexing_da["non_dim"][:2]) assert_identical(actual["non_dim2"], indexing_da["non_dim2"]) @@ -1286,8 +1288,10 @@ def test_sel_dataarray(self): expected = data.isel(dim2=[0, 1, 2]).rename({"dim2": "new_dim"}) assert "new_dim" in actual.dims assert "new_dim" in actual.coords - assert_equal(actual.drop("new_dim").drop("dim2"), expected.drop("new_dim")) - assert_equal(actual["new_dim"].drop("dim2"), ind["new_dim"]) + assert_equal( + actual.drop_vars("new_dim").drop_vars("dim2"), expected.drop_vars("new_dim") + ) + assert_equal(actual["new_dim"].drop_vars("dim2"), ind["new_dim"]) # with conflicted coordinate (silently ignored) ind = DataArray( @@ -1304,10 +1308,12 @@ def test_sel_dataarray(self): coords={"new_dim": ["a", "b", "c"], "dim2": 3}, ) actual = data.sel(dim2=ind) - assert_equal(actual["new_dim"].drop("dim2"), ind["new_dim"].drop("dim2")) + assert_equal( + actual["new_dim"].drop_vars("dim2"), ind["new_dim"].drop_vars("dim2") + ) expected = data.isel(dim2=[0, 1, 2]) expected["dim2"] = (("new_dim"), expected["dim2"].values) - assert_equal(actual["dim2"].drop("new_dim"), expected["dim2"]) + assert_equal(actual["dim2"].drop_vars("new_dim"), expected["dim2"]) assert actual["var1"].dims == ("dim1", "new_dim") # with non-dimensional coordinate @@ -1322,7 +1328,7 @@ def test_sel_dataarray(self): ) actual = data.sel(dim2=ind) expected = data.isel(dim2=[0, 1, 2]) - assert_equal(actual.drop("new_dim"), expected) + assert_equal(actual.drop_vars("new_dim"), expected) assert np.allclose(actual["new_dim"].values, ind["new_dim"].values) def test_sel_dataarray_mindex(self): @@ -1554,8 +1560,8 @@ def test_sel_fancy(self): expected_ary = data["foo"][[0, 1, 2], [0, 2, 1]] actual = data.sel(x=idx_x, y=idx_y) assert_array_equal(expected_ary, actual["foo"]) - assert_identical(actual["a"].drop("x"), idx_x["a"]) - assert_identical(actual["b"].drop("y"), idx_y["b"]) + assert_identical(actual["a"].drop_vars("x"), idx_x["a"]) + assert_identical(actual["b"].drop_vars("y"), idx_y["b"]) with pytest.raises(KeyError): data.sel(x=[2.5], y=[2.0], method="pad", tolerance=1e-3) @@ -2094,36 +2100,50 @@ def test_variable_indexing(self): def test_drop_variables(self): data = create_test_data() - assert_identical(data, data.drop([])) + assert_identical(data, data.drop_vars([])) expected = Dataset({k: data[k] for k in data.variables if k != "time"}) - actual = data.drop("time") + actual = data.drop_vars("time") assert_identical(expected, actual) - actual = data.drop(["time"]) + actual = data.drop_vars(["time"]) assert_identical(expected, actual) with raises_regex(ValueError, "cannot be found"): - data.drop("not_found_here") + data.drop_vars("not_found_here") + + actual = data.drop_vars("not_found_here", errors="ignore") + assert_identical(data, actual) + + actual = data.drop_vars(["not_found_here"], errors="ignore") + assert_identical(data, actual) + + actual = data.drop_vars(["time", "not_found_here"], errors="ignore") + assert_identical(expected, actual) + + # deprecated approach with `drop` works (straight copy paste from above) - actual = data.drop("not_found_here", errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop("not_found_here", errors="ignore") assert_identical(data, actual) - actual = data.drop(["not_found_here"], errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop(["not_found_here"], errors="ignore") assert_identical(data, actual) - actual = data.drop(["time", "not_found_here"], errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop(["time", "not_found_here"], errors="ignore") assert_identical(expected, actual) def test_drop_index_labels(self): data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) with pytest.warns(DeprecationWarning): - actual = data.drop(["a"], "x") + actual = data.drop(["a"], dim="x") expected = data.isel(x=[1]) assert_identical(expected, actual) with pytest.warns(DeprecationWarning): - actual = data.drop(["a", "b"], "x") + actual = data.drop(["a", "b"], dim="x") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) @@ -2147,30 +2167,30 @@ def test_drop_index_labels(self): # DataArrays as labels are a nasty corner case as they are not # Iterable[Hashable] - DataArray.__iter__ yields scalar DataArrays. - actual = data.drop(DataArray(["a", "b", "c"]), "x", errors="ignore") + actual = data.drop_sel(x=DataArray(["a", "b", "c"]), errors="ignore") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) + with pytest.warns(DeprecationWarning): + data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore") + assert_identical(expected, actual) with raises_regex(ValueError, "does not have coordinate labels"): - data.drop(1, "y") + data.drop_sel(y=1) def test_drop_labels_by_keyword(self): - # Tests for #2910: Support for a additional `drop()` API. data = Dataset( {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} ) # Basic functionality. assert len(data.coords["x"]) == 2 - # In the future, this will break. with pytest.warns(DeprecationWarning): ds1 = data.drop(["a"], dim="x") - ds2 = data.drop(x="a") - ds3 = data.drop(x=["a"]) - ds4 = data.drop(x=["a", "b"]) - ds5 = data.drop(x=["a", "b"], y=range(0, 6, 2)) + ds2 = data.drop_sel(x="a") + ds3 = data.drop_sel(x=["a"]) + ds4 = data.drop_sel(x=["a", "b"]) + ds5 = data.drop_sel(x=["a", "b"], y=range(0, 6, 2)) - # In the future, this will result in different behavior. arr = DataArray(range(3), dims=["c"]) with pytest.warns(FutureWarning): data.drop(arr.coords) @@ -2187,10 +2207,11 @@ def test_drop_labels_by_keyword(self): # Error handling if user tries both approaches. with pytest.raises(ValueError): data.drop(labels=["a"], x="a") - with pytest.raises(ValueError): - data.drop(dim="x", x="a") with pytest.raises(ValueError): data.drop(labels=["a"], dim="x", x="a") + warnings.filterwarnings("ignore", r"\W*drop") + with pytest.raises(ValueError): + data.drop(dim="x", x="a") def test_drop_dims(self): data = xr.Dataset( @@ -2203,15 +2224,15 @@ def test_drop_dims(self): ) actual = data.drop_dims("x") - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) actual = data.drop_dims("y") - expected = data.drop("A") + expected = data.drop_vars("A") assert_identical(expected, actual) actual = data.drop_dims(["x", "y"]) - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) with pytest.raises((ValueError, KeyError)): @@ -2230,7 +2251,7 @@ def test_drop_dims(self): actual = data.drop_dims("z", errors="wrong_value") actual = data.drop_dims(["x", "y", "z"], errors="ignore") - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) def test_copy(self): @@ -2571,7 +2592,7 @@ def test_expand_dims_mixed_int_and_coords(self): original["x"].values * np.ones([4, 3, 3]), coords=dict(d=range(4), e=["l", "m", "n"], a=np.linspace(0, 1, 3)), dims=["d", "e", "a"], - ).drop("d"), + ).drop_vars("d"), "y": xr.DataArray( original["y"].values * np.ones([4, 3, 4, 3]), coords=dict( @@ -2581,7 +2602,7 @@ def test_expand_dims_mixed_int_and_coords(self): a=np.linspace(0, 1, 3), ), dims=["d", "e", "b", "a"], - ).drop("d"), + ).drop_vars("d"), }, coords={"c": np.linspace(0, 1, 5)}, ) @@ -3059,7 +3080,7 @@ def test_setitem_with_coords(self): np.arange(10), dims="dim3", coords={"numbers": ("dim3", np.arange(10))} ) expected = ds.copy() - expected["var3"] = other.drop("numbers") + expected["var3"] = other.drop_vars("numbers") actual = ds.copy() actual["var3"] = other assert_identical(expected, actual) @@ -4504,7 +4525,9 @@ def test_apply(self): actual = data.apply(lambda x: x.mean(keep_attrs=True), keep_attrs=True) assert_identical(expected, actual) - assert_identical(data.apply(lambda x: x, keep_attrs=True), data.drop("time")) + assert_identical( + data.apply(lambda x: x, keep_attrs=True), data.drop_vars("time") + ) def scale(x, multiple=1): return multiple * x @@ -4514,7 +4537,7 @@ def scale(x, multiple=1): assert_identical(actual["numbers"], data["numbers"]) actual = data.apply(np.asarray) - expected = data.drop("time") # time is not used on a data var + expected = data.drop_vars("time") # time is not used on a data var assert_equal(expected, actual) def make_example_math_dataset(self): @@ -4616,7 +4639,7 @@ def test_dataset_math_auto_align(self): assert_identical(expected, actual) actual = ds.isel(y=slice(1)) + ds.isel(y=slice(1, None)) - expected = 2 * ds.drop(ds.y, dim="y") + expected = 2 * ds.drop_sel(y=ds.y) assert_equal(actual, expected) actual = ds + ds[["bar"]] diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 9df2f167cf2..f678af2fec5 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -441,7 +441,8 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim): ) expected = getattr(da, func)(dim=aggdim, skipna=skipna) assert_allclose( - actual.drop(list(actual.coords)), expected.drop(list(expected.coords)) + actual.drop_vars(list(actual.coords)), + expected.drop_vars(list(expected.coords)), ) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index b9dc9a71acc..b93325d7eab 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -553,7 +553,7 @@ def test_datetime_single_string(): actual = da.interp(time="2000-01-01T12:00") expected = xr.DataArray(0.5) - assert_allclose(actual.drop("time"), expected) + assert_allclose(actual.drop_vars("time"), expected) @requires_cftime diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 7deabd46eae..6e283ea01da 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1837,7 +1837,11 @@ def test_default_labels(self): assert substring_in_axes(self.darray.name, ax) def test_test_empty_cell(self): - g = self.darray.isel(row=1).drop("row").plot(col="col", hue="hue", col_wrap=2) + g = ( + self.darray.isel(row=1) + .drop_vars("row") + .plot(col="col", hue="hue", col_wrap=2) + ) bottomright = g.axes[-1, -1] assert not bottomright.has_data() assert not bottomright.get_visible() diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 9d14104bb50..80063f8b4bc 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1093,7 +1093,7 @@ def test_content_manipulation(self, func, dtype): "func", ( pytest.param( - method("drop", labels=np.array([1, 5]), dim="x"), + method("drop_sel", labels=dict(x=np.array([1, 5]))), marks=pytest.mark.xfail( reason="selecting using incompatible units does not raise" ), @@ -1128,9 +1128,9 @@ def test_content_manipulation_with_units(self, func, unit, error, dtype): expected = attach_units( func(strip_units(data_array), **stripped_kwargs), - {"data": quantity.units if func.name == "drop" else unit, "x": x.units}, + {"data": quantity.units if func.name == "drop_sel" else unit, "x": x.units}, ) - if error is not None and func.name == "drop": + if error is not None and func.name == "drop_sel": with pytest.raises(error): func(data_array, **kwargs) else: From bb89534687ee5dac54d87c22154d3cfeb030ce21 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 7 Nov 2019 19:01:08 -0500 Subject: [PATCH 06/15] whatsnew corrections (#3494) --- doc/whats-new.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0906058469d..04fe88e9993 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,7 +41,7 @@ New Features - :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels. :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible - option for dropping either lables or variables, but using the more specific methods is encouraged. + option for dropping either labels or variables, but using the more specific methods is encouraged. (:pull:`3475`) By `Maximilian Roos `_ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) @@ -120,6 +120,8 @@ Internal Changes - Run basic CI tests on Python 3.8. (:pull:`3477`) By `Maximilian Roos `_ +- Enable type checking on default sentinel values (:pull:`3472`) + By `Maximilian Roos `_ .. _whats-new.0.14.0: From 3bb0414f1f45890607bfe178f64577c5936d0432 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 8 Nov 2019 13:14:09 +0100 Subject: [PATCH 07/15] unpin pseudonetcdf (#3496) --- ci/requirements/py36.yml | 2 +- ci/requirements/py37-windows.yml | 2 +- ci/requirements/py37.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index f9847ef6da5..10fe69253e8 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -29,7 +29,7 @@ dependencies: - pandas - pint - pip - - pseudonetcdf<3.1 # FIXME https://github.com/pydata/xarray/issues/3409 + - pseudonetcdf - pydap - pynio - pytest diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index 111cd96c30c..614a3bb1fab 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -29,7 +29,7 @@ dependencies: - pandas - pint - pip - - pseudonetcdf<3.1 # FIXME https://github.com/pydata/xarray/issues/3409 + - pseudonetcdf - pydap # - pynio # Not available on Windows - pytest diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index d816019dd65..827c664a222 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -29,7 +29,7 @@ dependencies: - pandas - pint - pip - - pseudonetcdf<3.1 # FIXME https://github.com/pydata/xarray/issues/3409 + - pseudonetcdf - pydap - pynio - pytest From 37e5ae7df590e33e1c1dfbfcded318dc50dddf26 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 8 Nov 2019 15:33:06 +0000 Subject: [PATCH 08/15] fix pandas-dev tests (#3491) * Don't initialize DataArrays with Pandas attrs. * re-enable pandas-dev CI. --- ci/azure/install.yml | 2 +- xarray/core/dataarray.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/azure/install.yml b/ci/azure/install.yml index 8da0ac1b5de..fee886ba804 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -16,7 +16,7 @@ steps: --pre \ --upgrade \ matplotlib \ - pandas=0.26.0.dev0+628.g03c1a3db2 \ # FIXME https://github.com/pydata/xarray/issues/3440 + pandas \ scipy # numpy \ # FIXME https://github.com/pydata/xarray/issues/3409 pip install \ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d2d37871ee9..3e4c7903180 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -51,6 +51,7 @@ from .dataset import Dataset, merge_indexes, split_indexes from .formatting import format_item from .indexes import Indexes, default_indexes +from .merge import PANDAS_TYPES from .options import OPTIONS from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( @@ -357,7 +358,7 @@ def __init__( dims = getattr(data, "dims", getattr(coords, "dims", None)) if name is None: name = getattr(data, "name", None) - if attrs is None: + if attrs is None and not isinstance(data, PANDAS_TYPES): attrs = getattr(data, "attrs", None) if encoding is None: encoding = getattr(data, "encoding", None) From ffc32755ca98d11208b6403d424ebcc2ba5bc4fa Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 9 Nov 2019 05:21:26 +0100 Subject: [PATCH 09/15] tests for datasets with units (#3447) * start writing the tests for dataset * add tests for initializing Datasets * add tests for aggregation methods / functions * add tests for the ndarray methods / properties * add tests for missing value handling methods * add tests for comparison methods * add tests for reordering / stacking the test for to_stacked_array seems a bit brittle * add tests for indexing methods * remove the commented out xfail on Dataset.squeeze * add tests for head, tail and thin * add tests for the computation methods * add tests for grouped operations * add tests for the content manipulation methods * fix reindex_like to actually expect errors where appropriate * use DataArray.copy to replicate a DataArray with different data * add tests for repr / str * remove the comment about moving the merge tests * construct a new data array instead of using `copy` which in combination with `assign_coords` make preserving `MultiIndex` instances much more complicated. * update whats-new.rst --- doc/whats-new.rst | 2 +- xarray/tests/test_units.py | 1744 +++++++++++++++++++++++++++++++++++- 2 files changed, 1740 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 04fe88e9993..d2a4b32a71f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -104,7 +104,7 @@ Internal Changes ~~~~~~~~~~~~~~~~ - Added integration tests against `pint `_. - (:pull:`3238`) by `Justus Magin `_. + (:pull:`3238`, :pull:`3447`) by `Justus Magin `_. .. note:: diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 80063f8b4bc..8eed1f0dbe3 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -123,14 +123,19 @@ def extract_units(obj): def strip_units(obj): if isinstance(obj, xr.Dataset): - data_vars = {name: strip_units(value) for name, value in obj.data_vars.items()} - coords = {name: strip_units(value) for name, value in obj.coords.items()} + data_vars = { + strip_units(name): strip_units(value) + for name, value in obj.data_vars.items() + } + coords = { + strip_units(name): strip_units(value) for name, value in obj.coords.items() + } new_obj = xr.Dataset(data_vars=data_vars, coords=coords) elif isinstance(obj, xr.DataArray): data = array_strip_units(obj.data) coords = { - name: ( + strip_units(name): ( (value.dims, array_strip_units(value.data)) if isinstance(value.data, Quantity) else value # to preserve multiindexes @@ -138,9 +143,13 @@ def strip_units(obj): for name, value in obj.coords.items() } - new_obj = xr.DataArray(name=obj.name, data=data, coords=coords, dims=obj.dims) - elif hasattr(obj, "magnitude"): + new_obj = xr.DataArray( + name=strip_units(obj.name), data=data, coords=coords, dims=obj.dims + ) + elif isinstance(obj, unit_registry.Quantity): new_obj = obj.magnitude + elif isinstance(obj, (list, tuple)): + return type(obj)(strip_units(elem) for elem in obj) else: new_obj = obj @@ -191,6 +200,38 @@ def attach_units(obj, units): return new_obj +def convert_units(obj, to): + if isinstance(obj, xr.Dataset): + data_vars = { + name: convert_units(array, to) for name, array in obj.data_vars.items() + } + coords = {name: convert_units(array, to) for name, array in obj.coords.items()} + + new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs) + elif isinstance(obj, xr.DataArray): + name = obj.name + + new_units = ( + to.get(name, None) or to.get("data", None) or to.get(None, None) or 1 + ) + data = convert_units(obj.data, {None: new_units}) + + coords = { + name: (array.dims, convert_units(array.data, to)) + for name, array in obj.coords.items() + if name != obj.name + } + + new_obj = xr.DataArray(name=name, data=data, coords=coords, attrs=obj.attrs) + elif isinstance(obj, unit_registry.Quantity): + units = to.get(None) + new_obj = obj.to(units) if units is not None else obj + else: + new_obj = obj + + return new_obj + + def assert_equal_with_units(a, b): # works like xr.testing.assert_equal, but also explicitly checks units # so, it is more like assert_identical @@ -1632,3 +1673,1696 @@ def test_grouped_operations(self, func, dtype): result = func(data_array.groupby("y")) assert_equal_with_units(expected, result) + + +class TestDataset: + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="same_unit"), + ), + ) + @pytest.mark.parametrize( + "shared", + ( + "nothing", + pytest.param( + "dims", + marks=pytest.mark.xfail(reason="reindex does not work with pint yet"), + ), + pytest.param( + "coords", + marks=pytest.mark.xfail(reason="reindex does not work with pint yet"), + ), + ), + ) + def test_init(self, shared, unit, error, dtype): + original_unit = unit_registry.m + scaled_unit = unit_registry.mm + + a = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa + b = np.linspace(-1, 0, 12).astype(dtype) * unit_registry.Pa + + raw_x = np.arange(a.shape[0]) + x = raw_x * original_unit + x2 = x.to(scaled_unit) + + raw_y = np.arange(b.shape[0]) + y = raw_y * unit + y_units = unit if isinstance(y, unit_registry.Quantity) else None + if isinstance(y, unit_registry.Quantity): + if y.check(scaled_unit): + y2 = y.to(scaled_unit) + else: + y2 = y * 1000 + y2_units = y2.units + else: + y2 = y * 1000 + y2_units = None + + variants = { + "nothing": ({"x": x, "x2": ("x", x2)}, {"y": y, "y2": ("y", y2)}), + "dims": ( + {"x": x, "x2": ("x", strip_units(x2))}, + {"x": y, "y2": ("x", strip_units(y2))}, + ), + "coords": ({"x": raw_x, "y": ("x", x2)}, {"x": raw_y, "y": ("x", y2)}), + } + coords_a, coords_b = variants.get(shared) + + dims_a, dims_b = ("x", "y") if shared == "nothing" else ("x", "x") + + arr1 = xr.DataArray(data=a, coords=coords_a, dims=dims_a) + arr2 = xr.DataArray(data=b, coords=coords_b, dims=dims_b) + if error is not None and shared != "nothing": + with pytest.raises(error): + xr.Dataset(data_vars={"a": arr1, "b": arr2}) + + return + + result = xr.Dataset(data_vars={"a": arr1, "b": arr2}) + + expected_units = { + "a": a.units, + "b": b.units, + "x": x.units, + "x2": x2.units, + "y": y_units, + "y2": y2_units, + } + expected = attach_units( + xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}), + expected_units, + ) + assert_equal_with_units(result, expected) + + @pytest.mark.parametrize( + "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr")) + ) + @pytest.mark.parametrize( + "variant", + ( + pytest.param( + "with_dims", + marks=pytest.mark.xfail(reason="units in indexes are not supported"), + ), + pytest.param("with_coords"), + pytest.param("without_coords"), + ), + ) + @pytest.mark.filterwarnings("error:::pint[.*]") + def test_repr(self, func, variant, dtype): + array1 = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.Pa + array2 = np.linspace(0, 1, 10, dtype=dtype) * unit_registry.degK + + x = np.arange(len(array1)) * unit_registry.s + y = x.to(unit_registry.ms) + + variants = { + "with_dims": {"x": x}, + "with_coords": {"y": ("x", y)}, + "without_coords": {}, + } + + data_array = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("x", array2)}, + coords=variants.get(variant), + ) + + # FIXME: this just checks that the repr does not raise + # warnings or errors, but does not check the result + func(data_array) + + @pytest.mark.parametrize( + "func", + ( + pytest.param( + function("all"), + marks=pytest.mark.xfail(reason="not implemented by pint"), + ), + pytest.param( + function("any"), + marks=pytest.mark.xfail(reason="not implemented by pint"), + ), + function("argmax"), + function("argmin"), + function("max"), + function("min"), + function("mean"), + pytest.param( + function("median"), + marks=pytest.mark.xfail( + reason="np.median does not work with dataset yet" + ), + ), + pytest.param( + function("sum"), + marks=pytest.mark.xfail( + reason="np.result_type not implemented by pint" + ), + ), + pytest.param( + function("prod"), + marks=pytest.mark.xfail(reason="not implemented by pint"), + ), + function("std"), + function("var"), + function("cumsum"), + pytest.param( + function("cumprod"), + marks=pytest.mark.xfail( + reason="pint does not support cumprod on non-dimensionless yet" + ), + ), + pytest.param( + method("all"), marks=pytest.mark.xfail(reason="not implemented by pint") + ), + pytest.param( + method("any"), marks=pytest.mark.xfail(reason="not implemented by pint") + ), + method("argmax"), + method("argmin"), + method("max"), + method("min"), + method("mean"), + method("median"), + pytest.param( + method("sum"), + marks=pytest.mark.xfail( + reason="np.result_type not implemented by pint" + ), + ), + pytest.param( + method("prod"), + marks=pytest.mark.xfail(reason="not implemented by pint"), + ), + method("std"), + method("var"), + method("cumsum"), + pytest.param( + method("cumprod"), + marks=pytest.mark.xfail( + reason="pint does not support cumprod on non-dimensionless yet" + ), + ), + ), + ids=repr, + ) + def test_aggregation(self, func, dtype): + unit_a = unit_registry.Pa + unit_b = unit_registry.kg / unit_registry.m ** 3 + a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x") + b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x") + x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x") + y = xr.DataArray( + data=np.arange(10, 20).astype(dtype) * unit_registry.s, dims="x" + ) + + ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y}) + + result = func(ds) + expected = attach_units( + func(strip_units(ds)), + {"a": array_extract_units(func(a)), "b": array_extract_units(func(b))}, + ) + + assert_equal_with_units(result, expected) + + @pytest.mark.parametrize("property", ("imag", "real")) + def test_numpy_properties(self, property, dtype): + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray( + data=np.linspace(0, 1, 10) * unit_registry.Pa, dims="x" + ), + "b": xr.DataArray( + data=np.linspace(-1, 0, 15) * unit_registry.Pa, dims="y" + ), + }, + coords={ + "x": np.arange(10) * unit_registry.m, + "y": np.arange(15) * unit_registry.s, + }, + ) + units = extract_units(ds) + + result = getattr(ds, property) + expected = attach_units(getattr(strip_units(ds), property), units) + + assert_equal_with_units(result, expected) + + @pytest.mark.parametrize( + "func", + ( + method("astype", float), + method("conj"), + method("argsort"), + method("conjugate"), + method("round"), + pytest.param( + method("rank", dim="x"), + marks=pytest.mark.xfail(reason="pint does not implement rank yet"), + ), + ), + ids=repr, + ) + def test_numpy_methods(self, func, dtype): + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray( + data=np.linspace(1, -1, 10) * unit_registry.Pa, dims="x" + ), + "b": xr.DataArray( + data=np.linspace(-1, 1, 15) * unit_registry.Pa, dims="y" + ), + }, + coords={ + "x": np.arange(10) * unit_registry.m, + "y": np.arange(15) * unit_registry.s, + }, + ) + units = { + "a": array_extract_units(func(ds.a)), + "b": array_extract_units(func(ds.b)), + "x": unit_registry.m, + "y": unit_registry.s, + } + + result = func(ds) + expected = attach_units(func(strip_units(ds)), units) + + assert_equal_with_units(result, expected) + + @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr) + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + def test_numpy_methods_with_args(self, func, unit, error, dtype): + data_unit = unit_registry.m + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=np.arange(10) * data_unit, dims="x"), + "b": xr.DataArray(data=np.arange(15) * data_unit, dims="y"), + }, + coords={ + "x": np.arange(10) * unit_registry.m, + "y": np.arange(15) * unit_registry.s, + }, + ) + units = extract_units(ds) + + def strip(value): + return ( + value.magnitude if isinstance(value, unit_registry.Quantity) else value + ) + + def convert(value, to): + if isinstance(value, unit_registry.Quantity) and value.check(to): + return value.to(to) + + return value + + scalar_types = (int, float) + kwargs = { + key: (value * unit if isinstance(value, scalar_types) else value) + for key, value in func.kwargs.items() + } + + stripped_kwargs = { + key: strip(convert(value, data_unit)) for key, value in kwargs.items() + } + + if error is not None: + with pytest.raises(error): + func(ds, **kwargs) + + return + + result = func(ds, **kwargs) + expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) + + assert_equal_with_units(result, expected) + + @pytest.mark.parametrize( + "func", (method("isnull"), method("notnull"), method("count")), ids=repr + ) + def test_missing_value_detection(self, func, dtype): + array1 = ( + np.array( + [ + [1.4, 2.3, np.nan, 7.2], + [np.nan, 9.7, np.nan, np.nan], + [2.1, np.nan, np.nan, 4.6], + [9.9, np.nan, 7.2, 9.1], + ] + ) + * unit_registry.degK + ) + array2 = ( + np.array( + [ + [np.nan, 5.7, 12.0, 7.2], + [np.nan, 12.4, np.nan, 4.2], + [9.8, np.nan, 4.6, 1.4], + [7.2, np.nan, 6.3, np.nan], + [8.4, 3.9, np.nan, np.nan], + ] + ) + * unit_registry.Pa + ) + + x = np.arange(array1.shape[0]) * unit_registry.m + y = np.arange(array1.shape[1]) * unit_registry.m + z = np.arange(array2.shape[0]) * unit_registry.m + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("z", "x")), + }, + coords={"x": x, "y": y, "z": z}, + ) + + expected = func(strip_units(ds)) + result = func(ds) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="ffill and bfill lose the unit") + @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) + def test_missing_value_filling(self, func, dtype): + array1 = ( + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) + * unit_registry.degK + ) + array2 = ( + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) + * unit_registry.Pa + ) + + x = np.arange(len(array1)) + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + expected = attach_units( + func(strip_units(ds), dim="x"), + {"a": unit_registry.degK, "b": unit_registry.Pa}, + ) + result = func(ds, dim="x") + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="fillna drops the unit") + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param( + 1, + DimensionalityError, + id="no_unit", + marks=pytest.mark.xfail(reason="blocked by the failing `where`"), + ), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "fill_value", + ( + pytest.param( + -1, + id="python scalar", + marks=pytest.mark.xfail( + reason="python scalar cannot be converted using astype()" + ), + ), + pytest.param(np.array(-1), id="numpy scalar"), + pytest.param(np.array([-1]), id="numpy array"), + ), + ) + def test_fillna(self, fill_value, unit, error, dtype): + array1 = ( + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) + * unit_registry.m + ) + array2 = ( + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) + * unit_registry.m + ) + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + } + ) + + if error is not None: + with pytest.raises(error): + ds.fillna(value=fill_value * unit) + + return + + result = ds.fillna(value=fill_value * unit) + expected = attach_units( + strip_units(ds).fillna(value=fill_value), + {"a": unit_registry.m, "b": unit_registry.m}, + ) + + assert_equal_with_units(expected, result) + + def test_dropna(self, dtype): + array1 = ( + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) + * unit_registry.degK + ) + array2 = ( + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) + * unit_registry.Pa + ) + x = np.arange(len(array1)) + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + expected = attach_units( + strip_units(ds).dropna(dim="x"), + {"a": unit_registry.degK, "b": unit_registry.Pa}, + ) + result = ds.dropna(dim="x") + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="pint does not implement `numpy.isin`") + @pytest.mark.parametrize( + "unit", + ( + pytest.param(1, id="no_unit"), + pytest.param(unit_registry.dimensionless, id="dimensionless"), + pytest.param(unit_registry.s, id="incompatible_unit"), + pytest.param(unit_registry.cm, id="compatible_unit"), + pytest.param(unit_registry.m, id="same_unit"), + ), + ) + def test_isin(self, unit, dtype): + array1 = ( + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) + * unit_registry.m + ) + array2 = ( + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) + * unit_registry.m + ) + x = np.arange(len(array1)) + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype) + values = raw_values * unit + + if ( + isinstance(values, unit_registry.Quantity) + and values.check(unit_registry.m) + and unit != unit_registry.m + ): + raw_values = values.to(unit_registry.m).magnitude + + expected = strip_units(ds).isin(raw_values) + if not isinstance(values, unit_registry.Quantity) or not values.check( + unit_registry.m + ): + expected.a[:] = False + expected.b[:] = False + result = ds.isin(values) + + assert_equal_with_units(result, expected) + + @pytest.mark.parametrize( + "variant", + ( + pytest.param( + "masking", + marks=pytest.mark.xfail( + reason="np.result_type not implemented by quantity" + ), + ), + pytest.param( + "replacing_scalar", + marks=pytest.mark.xfail( + reason="python scalar not convertible using astype" + ), + ), + pytest.param( + "replacing_array", + marks=pytest.mark.xfail( + reason="replacing using an array drops the units" + ), + ), + pytest.param( + "dropping", + marks=pytest.mark.xfail(reason="nan not compatible with quantity"), + ), + ), + ) + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="same_unit"), + ), + ) + def test_where(self, variant, unit, error, dtype): + def _strip_units(mapping): + return {key: array_strip_units(value) for key, value in mapping.items()} + + original_unit = unit_registry.m + array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit + array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": np.arange(len(array1))}, + ) + + condition = ds < 0.5 * original_unit + other = np.linspace(-2, -1, 10).astype(dtype) * unit + variant_kwargs = { + "masking": {"cond": condition}, + "replacing_scalar": {"cond": condition, "other": -1 * unit}, + "replacing_array": {"cond": condition, "other": other}, + "dropping": {"cond": condition, "drop": True}, + } + kwargs = variant_kwargs.get(variant) + kwargs_without_units = _strip_units(kwargs) + + if variant not in ("masking", "dropping") and error is not None: + with pytest.raises(error): + ds.where(**kwargs) + + return + + expected = attach_units( + strip_units(ds).where(**kwargs_without_units), + {"a": original_unit, "b": original_unit}, + ) + result = ds.where(**kwargs) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="interpolate strips units") + def test_interpolate_na(self, dtype): + array1 = ( + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) + * unit_registry.degK + ) + array2 = ( + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) + * unit_registry.Pa + ) + x = np.arange(len(array1)) + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + expected = attach_units( + strip_units(ds).interpolate_na(dim="x"), + {"a": unit_registry.degK, "b": unit_registry.Pa}, + ) + result = ds.interpolate_na(dim="x") + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="uses Dataset.where, which currently fails") + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="same_unit"), + ), + ) + def test_combine_first(self, unit, error, dtype): + array1 = ( + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) + * unit_registry.degK + ) + array2 = ( + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) + * unit_registry.Pa + ) + x = np.arange(len(array1)) + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + other_array1 = np.ones_like(array1) * unit + other_array2 = -1 * np.ones_like(array2) * unit + other = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=other_array1, dims="x"), + "b": xr.DataArray(data=other_array2, dims="x"), + }, + coords={"x": np.arange(array1.shape[0])}, + ) + + if error is not None: + with pytest.raises(error): + ds.combine_first(other) + + return + + expected = attach_units( + strip_units(ds).combine_first(strip_units(other)), + {"a": unit_registry.m, "b": unit_registry.m}, + ) + result = ds.combine_first(other) + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize( + "unit", + ( + pytest.param(1, id="no_unit"), + pytest.param(unit_registry.dimensionless, id="dimensionless"), + pytest.param(unit_registry.s, id="incompatible_unit"), + pytest.param( + unit_registry.cm, + id="compatible_unit", + marks=pytest.mark.xfail(reason="identical does not check units yet"), + ), + pytest.param(unit_registry.m, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "variation", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="units in indexes not supported") + ), + "coords", + ), + ) + @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) + def test_comparisons(self, func, variation, unit, dtype): + array1 = np.linspace(0, 5, 10).astype(dtype) + array2 = np.linspace(-5, 0, 10).astype(dtype) + + coord = np.arange(len(array1)).astype(dtype) + + original_unit = unit_registry.m + quantity1 = array1 * original_unit + quantity2 = array2 * original_unit + x = coord * original_unit + y = coord * original_unit + + units = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = units.get(variation) + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=quantity1, dims="x"), + "b": xr.DataArray(data=quantity2, dims="x"), + }, + coords={"x": x, "y": ("x", y)}, + ) + + other = attach_units( + strip_units(ds), + { + "a": (data_unit, original_unit if quantity1.check(data_unit) else None), + "b": (data_unit, original_unit if quantity2.check(data_unit) else None), + "x": (dim_unit, original_unit if x.check(dim_unit) else None), + "y": (coord_unit, original_unit if y.check(coord_unit) else None), + }, + ) + + # TODO: test dim coord once indexes leave units intact + # also, express this in terms of calls on the raw data array + # and then check the units + equal_arrays = ( + np.all(ds.a.data == other.a.data) + and np.all(ds.b.data == other.b.data) + and (np.all(x == other.x.data) or True) # dims can't be checked yet + and np.all(y == other.y.data) + ) + equal_units = ( + data_unit == original_unit + and coord_unit == original_unit + and dim_unit == original_unit + ) + expected = equal_arrays and (func.name != "identical" or equal_units) + result = func(ds, other) + + assert expected == result + + @pytest.mark.parametrize( + "unit", + ( + pytest.param(1, id="no_unit"), + pytest.param(unit_registry.dimensionless, id="dimensionless"), + pytest.param(unit_registry.s, id="incompatible_unit"), + pytest.param(unit_registry.cm, id="compatible_unit"), + pytest.param(unit_registry.m, id="identical_unit"), + ), + ) + def test_broadcast_equals(self, unit, dtype): + left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m + left_array2 = np.zeros(shape=(2, 6), dtype=dtype) * unit_registry.m + + right_array1 = array_attach_units( + np.ones(shape=(2,), dtype=dtype), + unit, + convert_from=unit_registry.m if left_array1.check(unit) else None, + ) + right_array2 = array_attach_units( + np.ones(shape=(2,), dtype=dtype), + unit, + convert_from=unit_registry.m if left_array2.check(unit) else None, + ) + + left = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=left_array1, dims=("x", "y")), + "b": xr.DataArray(data=left_array2, dims=("x", "z")), + } + ) + right = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=right_array1, dims="x"), + "b": xr.DataArray(data=right_array2, dims="x"), + } + ) + + expected = np.all(left_array1 == right_array1[:, None]) and np.all( + left_array2 == right_array2[:, None] + ) + result = left.broadcast_equals(right) + + assert expected == result + + @pytest.mark.parametrize( + "func", + (method("unstack"), method("reset_index", "v"), method("reorder_levels")), + ids=repr, + ) + def test_stacking_stacked(self, func, dtype): + array1 = ( + np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m + ) + array2 = ( + np.linspace(-10, 0, 5 * 10 * 15).reshape(5, 10, 15).astype(dtype) + * unit_registry.m + ) + + x = np.arange(array1.shape[0]) + y = np.arange(array1.shape[1]) + z = np.arange(array2.shape[2]) + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "y", "z")), + }, + coords={"x": x, "y": y, "z": z}, + ) + + stacked = ds.stack(v=("x", "y")) + + expected = attach_units( + func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m} + ) + result = func(stacked) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="tries to subscript scalar quantities") + def test_to_stacked_array(self, dtype): + labels = np.arange(5).astype(dtype) * unit_registry.s + arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels} + + ds = xr.Dataset( + data_vars={ + name: xr.DataArray(data=array, dims="x") + for name, array in arrays.items() + } + ) + + func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"]) + + result = func(ds).rename(None) + expected = attach_units( + func(strip_units(ds)).rename(None), + {None: unit_registry.m, "y": unit_registry.s}, + ) + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize( + "func", + ( + method("transpose", "y", "x", "z1", "z2"), + method("stack", a=("x", "y")), + method("set_index", x="x2"), + pytest.param( + method("shift", x=2), marks=pytest.mark.xfail(reason="sets all to nan") + ), + pytest.param( + method("roll", x=2, roll_coords=False), + marks=pytest.mark.xfail(reason="strips units"), + ), + method("sortby", "x2"), + ), + ids=repr, + ) + def test_stacking_reordering(self, func, dtype): + array1 = ( + np.linspace(0, 10, 2 * 5 * 10).reshape(2, 5, 10).astype(dtype) + * unit_registry.Pa + ) + array2 = ( + np.linspace(0, 10, 2 * 5 * 15).reshape(2, 5, 15).astype(dtype) + * unit_registry.degK + ) + + x = np.arange(array1.shape[0]) + y = np.arange(array1.shape[1]) + z1 = np.arange(array1.shape[2]) + z2 = np.arange(array2.shape[2]) + + x2 = np.linspace(0, 1, array1.shape[0])[::-1] + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y", "z1")), + "b": xr.DataArray(data=array2, dims=("x", "y", "z2")), + }, + coords={"x": x, "y": y, "z1": z1, "z2": z2, "x2": ("x", x2)}, + ) + + expected = attach_units( + func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK} + ) + result = func(ds) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="indexes strip units") + @pytest.mark.parametrize( + "indices", + ( + pytest.param(4, id="single index"), + pytest.param([5, 2, 9, 1], id="multiple indices"), + ), + ) + def test_isel(self, indices, dtype): + array1 = np.arange(10).astype(dtype) * unit_registry.s + array2 = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa + + x = np.arange(len(array1)) * unit_registry.m + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + expected = attach_units( + strip_units(ds).isel(x=indices), + {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m}, + ) + result = ds.isel(x=indices) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail( + reason="xarray does not support duck arrays in dimension coordinates" + ) + @pytest.mark.parametrize( + "values", + ( + pytest.param(12, id="single_value"), + pytest.param([10, 5, 13], id="list_of_values"), + pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), + ), + ) + @pytest.mark.parametrize( + "units,error", + ( + pytest.param(1, KeyError, id="no_units"), + pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), + pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), + pytest.param(unit_registry.ms, KeyError, id="compatible_unit"), + pytest.param(unit_registry.s, None, id="same_unit"), + ), + ) + def test_sel(self, values, units, error, dtype): + array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK + array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa + x = np.arange(len(array1)) * unit_registry.s + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + values_with_units = values * units + + if error is not None: + with pytest.raises(error): + ds.sel(x=values_with_units) + + return + + expected = attach_units( + strip_units(ds).sel(x=values), + {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s}, + ) + result = ds.sel(x=values_with_units) + assert_equal_with_units(expected, result) + + @pytest.mark.xfail( + reason="xarray does not support duck arrays in dimension coordinates" + ) + @pytest.mark.parametrize( + "values", + ( + pytest.param(12, id="single value"), + pytest.param([10, 5, 13], id="list of multiple values"), + pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"), + ), + ) + @pytest.mark.parametrize( + "units,error", + ( + pytest.param(1, KeyError, id="no_units"), + pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), + pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), + pytest.param(unit_registry.ms, KeyError, id="compatible_unit"), + pytest.param(unit_registry.s, None, id="same_unit"), + ), + ) + def test_loc(self, values, units, error, dtype): + array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK + array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa + x = np.arange(len(array1)) * unit_registry.s + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + values_with_units = values * units + + if error is not None: + with pytest.raises(error): + ds.loc[{"x": values_with_units}] + + return + + expected = attach_units( + strip_units(ds).loc[{"x": values}], + {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s}, + ) + result = ds.loc[{"x": values_with_units}] + assert_equal_with_units(expected, result) + + @pytest.mark.xfail( + reason="indexes strip units and head / tail / thin only support integers" + ) + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "func", + ( + method("head", x=7, y=3, z=6), + method("tail", x=7, y=3, z=6), + method("thin", x=7, y=3, z=6), + ), + ids=repr, + ) + def test_head_tail_thin(self, func, unit, error, dtype): + array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK + array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa + + coords = { + "x": np.arange(10) * unit_registry.m, + "y": np.arange(5) * unit_registry.m, + "z": np.arange(8) * unit_registry.m, + } + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "z")), + }, + coords=coords, + ) + + kwargs = {name: value * unit for name, value in func.kwargs.items()} + + if error is not None: + with pytest.raises(error): + func(ds, **kwargs) + + return + + expected = attach_units(func(strip_units(ds)), extract_units(ds)) + result = func(ds, **kwargs) + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize( + "shape", + ( + pytest.param((10, 20), id="nothing squeezable"), + pytest.param((10, 20, 1), id="last dimension squeezable"), + pytest.param((10, 1, 20), id="middle dimension squeezable"), + pytest.param((1, 10, 20), id="first dimension squeezable"), + pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"), + ), + ) + def test_squeeze(self, shape, dtype): + names = "xyzt" + coords = { + name: np.arange(length).astype(dtype) + * (unit_registry.m if name != "t" else unit_registry.s) + for name, length in zip(names, shape) + } + array1 = ( + np.linspace(0, 1, 10 * 20).astype(dtype).reshape(shape) * unit_registry.degK + ) + array2 = ( + np.linspace(1, 2, 10 * 20).astype(dtype).reshape(shape) * unit_registry.Pa + ) + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=tuple(names[: len(shape)])), + "b": xr.DataArray(data=array2, dims=tuple(names[: len(shape)])), + }, + coords=coords, + ) + units = extract_units(ds) + + expected = attach_units(strip_units(ds).squeeze(), units) + + result = ds.squeeze() + assert_equal_with_units(result, expected) + + # try squeezing the dimensions separately + names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) + for name in names: + expected = attach_units(strip_units(ds).squeeze(dim=name), units) + result = ds.squeeze(dim=name) + assert_equal_with_units(result, expected) + + @pytest.mark.xfail(reason="ignores units") + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + def test_interp(self, unit, error): + array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK + array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa + + coords = { + "x": np.arange(10) * unit_registry.m, + "y": np.arange(5) * unit_registry.m, + "z": np.arange(8) * unit_registry.s, + } + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "z")), + }, + coords=coords, + ) + + new_coords = (np.arange(10) + 0.5) * unit + + if error is not None: + with pytest.raises(error): + ds.interp(x=new_coords) + + return + + expected = attach_units( + strip_units(ds).interp(x=strip_units(new_coords)), extract_units(ds) + ) + result = ds.interp(x=new_coords) + + assert_equal_with_units(result, expected) + + @pytest.mark.xfail(reason="ignores units") + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + def test_interp_like(self, unit, error, dtype): + array1 = ( + np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) + array2 = ( + np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa + ) + + coords = { + "x": np.arange(10) * unit_registry.m, + "y": np.arange(5) * unit_registry.m, + "z": np.arange(8) * unit_registry.m, + } + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "z")), + }, + coords=coords, + ) + + other = xr.Dataset( + data_vars={ + "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")), + "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")), + }, + coords={ + "x": (np.arange(20) + 0.3) * unit, + "y": (np.arange(10) - 0.2) * unit, + "z": (np.arange(15) + 0.4) * unit, + }, + ) + + if error is not None: + with pytest.raises(error): + ds.interp_like(other) + + return + + expected = attach_units( + strip_units(ds).interp_like(strip_units(other)), extract_units(ds) + ) + result = ds.interp_like(other) + + assert_equal_with_units(result, expected) + + @pytest.mark.xfail( + reason="pint does not implement np.result_type in __array_function__ yet" + ) + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + def test_reindex(self, unit, error): + array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK + array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa + + coords = { + "x": np.arange(10) * unit_registry.m, + "y": np.arange(5) * unit_registry.m, + "z": np.arange(8) * unit_registry.s, + } + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "z")), + }, + coords=coords, + ) + + new_coords = (np.arange(10) + 0.5) * unit + + if error is not None: + with pytest.raises(error): + ds.interp(x=new_coords) + + return + + expected = attach_units( + strip_units(ds).reindex(x=strip_units(new_coords)), extract_units(ds) + ) + result = ds.reindex(x=new_coords) + + assert_equal_with_units(result, expected) + + @pytest.mark.xfail( + reason="pint does not implement np.result_type in __array_function__ yet" + ) + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + def test_reindex_like(self, unit, error, dtype): + array1 = ( + np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) + array2 = ( + np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa + ) + + coords = { + "x": np.arange(10) * unit_registry.m, + "y": np.arange(5) * unit_registry.m, + "z": np.arange(8) * unit_registry.m, + } + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "z")), + }, + coords=coords, + ) + + other = xr.Dataset( + data_vars={ + "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")), + "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")), + }, + coords={ + "x": (np.arange(20) + 0.3) * unit, + "y": (np.arange(10) - 0.2) * unit, + "z": (np.arange(15) + 0.4) * unit, + }, + ) + + if error is not None: + with pytest.raises(error): + ds.reindex_like(other) + + return + + expected = attach_units( + strip_units(ds).reindex_like(strip_units(other)), extract_units(ds) + ) + result = ds.reindex_like(other) + + assert_equal_with_units(result, expected) + + @pytest.mark.parametrize( + "func", + ( + method("diff", dim="x"), + method("differentiate", coord="x"), + method("integrate", coord="x"), + pytest.param( + method("quantile", q=[0.25, 0.75]), + marks=pytest.mark.xfail( + reason="pint does not implement nanpercentile yet" + ), + ), + pytest.param( + method("reduce", func=np.sum, dim="x"), + marks=pytest.mark.xfail(reason="strips units"), + ), + pytest.param( + method("apply", np.fabs), + marks=pytest.mark.xfail(reason="fabs strips units"), + ), + ), + ids=repr, + ) + def test_computation(self, func, dtype): + array1 = ( + np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) + array2 = ( + np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa + ) + x = np.arange(10) * unit_registry.m + y = np.arange(5) * unit_registry.m + z = np.arange(8) * unit_registry.m + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "z")), + }, + coords={"x": x, "y": y, "z": z}, + ) + + units = extract_units(ds) + + expected = attach_units(func(strip_units(ds)), units) + result = func(ds) + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize( + "func", + ( + pytest.param( + method("groupby", "x"), marks=pytest.mark.xfail(reason="strips units") + ), + pytest.param( + method("groupby_bins", "x", bins=4), + marks=pytest.mark.xfail(reason="strips units"), + ), + method("coarsen", x=2), + pytest.param( + method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units") + ), + pytest.param( + method("rolling_exp", x=3), + marks=pytest.mark.xfail(reason="strips units"), + ), + ), + ids=repr, + ) + def test_computation_objects(self, func, dtype): + array1 = ( + np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) + array2 = ( + np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) + * unit_registry.Pa + ) + x = np.arange(10) * unit_registry.m + y = np.arange(5) * unit_registry.m + z = np.arange(8) * unit_registry.m + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "y", "z")), + }, + coords={"x": x, "y": y, "z": z}, + ) + units = extract_units(ds) + + args = [] if func.name != "groupby" else ["y"] + reduce_func = method("mean", *args) + expected = attach_units(reduce_func(func(strip_units(ds))), units) + result = reduce_func(func(ds)) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="strips units") + def test_resample(self, dtype): + array1 = ( + np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) + array2 = ( + np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa + ) + t = pd.date_range("10-09-2010", periods=array1.shape[0], freq="1y") + y = np.arange(5) * unit_registry.m + z = np.arange(8) * unit_registry.m + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("time", "y")), + "b": xr.DataArray(data=array2, dims=("time", "z")), + }, + coords={"time": t, "y": y, "z": z}, + ) + units = extract_units(ds) + + func = method("resample", time="6m") + + expected = attach_units(func(strip_units(ds)).mean(), units) + result = func(ds).mean() + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize( + "func", + ( + pytest.param( + method("assign", c=lambda ds: 10 * ds.b), + marks=pytest.mark.xfail(reason="strips units"), + ), + pytest.param( + method("assign_coords", v=("x", np.arange(10) * unit_registry.s)), + marks=pytest.mark.xfail(reason="strips units"), + ), + pytest.param(method("first")), + pytest.param(method("last")), + pytest.param( + method("quantile", q=[0.25, 0.5, 0.75], dim="x"), + marks=pytest.mark.xfail( + reason="dataset groupby does not implement quantile" + ), + ), + ), + ids=repr, + ) + def test_grouped_operations(self, func, dtype): + array1 = ( + np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) + array2 = ( + np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) + * unit_registry.Pa + ) + x = np.arange(10) * unit_registry.m + y = np.arange(5) * unit_registry.m + z = np.arange(8) * unit_registry.m + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "y", "z")), + }, + coords={"x": x, "y": y, "z": z}, + ) + units = extract_units(ds) + units.update({"c": unit_registry.Pa, "v": unit_registry.s}) + + stripped_kwargs = { + name: strip_units(value) for name, value in func.kwargs.items() + } + expected = attach_units( + func(strip_units(ds).groupby("y"), **stripped_kwargs), units + ) + result = func(ds.groupby("y")) + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize( + "func", + ( + method("pipe", lambda ds: ds * 10), + method("assign", d=lambda ds: ds.b * 10), + method("assign_coords", y2=("y", np.arange(5) * unit_registry.mm)), + method("assign_attrs", attr1="value"), + method("rename", x2="x_mm"), + method("rename_vars", c="temperature"), + method("rename_dims", x="offset_x"), + method("swap_dims", {"x": "x2"}), + method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1), + method("drop", labels="x"), + method("drop_dims", "z"), + method("set_coords", names="c"), + method("reset_coords", names="x2"), + method("copy"), + ), + ids=repr, + ) + def test_content_manipulation(self, func, dtype): + array1 = ( + np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) + * unit_registry.m ** 3 + ) + array2 = ( + np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) + * unit_registry.Pa + ) + array3 = np.linspace(0, 10, 10).astype(dtype) * unit_registry.degK + + x = np.arange(10) * unit_registry.m + x2 = x.to(unit_registry.mm) + y = np.arange(5) * unit_registry.m + z = np.arange(8) * unit_registry.m + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims=("x", "y")), + "b": xr.DataArray(data=array2, dims=("x", "y", "z")), + "c": xr.DataArray(data=array3, dims="x"), + }, + coords={"x": x, "y": y, "z": z, "x2": ("x", x2)}, + ) + units = extract_units(ds) + units.update( + { + "y2": unit_registry.mm, + "x_mm": unit_registry.mm, + "offset_x": unit_registry.m, + "d": unit_registry.Pa, + "temperature": unit_registry.degK, + } + ) + + stripped_kwargs = { + key: strip_units(value) for key, value in func.kwargs.items() + } + expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) + result = func(ds) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail(reason="blocked by reindex") + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, xr.MergeError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, xr.MergeError, id="dimensionless" + ), + pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"), + pytest.param(unit_registry.cm, xr.MergeError, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize("variant", ("data", "dims", "coords")) + def test_merge(self, variant, unit, error, dtype): + original_data_unit = unit_registry.m + original_dim_unit = unit_registry.m + original_coord_unit = unit_registry.m + + variants = { + "data": (unit, original_dim_unit, original_coord_unit), + "dims": (original_data_unit, unit, original_coord_unit), + "coords": (original_data_unit, original_dim_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + left_array = np.arange(10).astype(dtype) * original_data_unit + right_array = np.arange(-5, 5).astype(dtype) * data_unit + + left_dim = np.arange(10, 20) * original_dim_unit + right_dim = np.arange(5, 15) * dim_unit + + left_coord = np.arange(-10, 0) * original_coord_unit + right_coord = np.arange(-15, -5) * coord_unit + + left = xr.Dataset( + data_vars={"a": ("x", left_array)}, + coords={"x": left_dim, "y": ("x", left_coord)}, + ) + right = xr.Dataset( + data_vars={"a": ("x", right_array)}, + coords={"x": right_dim, "y": ("x", right_coord)}, + ) + + units = extract_units(left) + + if error is not None: + with pytest.raises(error): + left.merge(right) + + return + + converted = convert_units(right, units) + expected = attach_units(strip_units(left).merge(strip_units(converted)), units) + result = left.merge(right) + + assert_equal_with_units(expected, result) From db0f13d194845b06fa82f64574d0e78d8449ddbe Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 9 Nov 2019 16:10:22 -0500 Subject: [PATCH 10/15] Dataset.map, GroupBy.map, Resample.map (#3459) * rename dataset.apply to dataset.map, deprecating apply * use apply in deprecation test * adjust docs * add groupby rename, remove depreciation warnings (to pending) * change internal usages * formatting * whatsnew * docs * docs * internal usages * formatting * docstring, see also --- doc/computation.rst | 4 +-- doc/groupby.rst | 15 ++++++----- doc/howdoi.rst | 2 +- doc/quick-overview.rst | 2 +- doc/whats-new.rst | 7 +++++ xarray/core/dataarray.py | 11 +++++--- xarray/core/dataset.py | 34 ++++++++++++++++++++--- xarray/core/groupby.py | 49 +++++++++++++++++++++++++++------- xarray/core/resample.py | 43 ++++++++++++++++++++++++++--- xarray/tests/test_dataarray.py | 36 ++++++++++++------------- xarray/tests/test_dataset.py | 43 ++++++++++++++++------------- xarray/tests/test_groupby.py | 14 +++++----- xarray/tests/test_sparse.py | 2 +- 13 files changed, 186 insertions(+), 76 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index ae5f4bc5c66..d477cb63d72 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -462,13 +462,13 @@ Datasets support most of the same methods found on data arrays: abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or -alternatively you can use :py:meth:`~xarray.Dataset.apply` to apply a function +alternatively you can use :py:meth:`~xarray.Dataset.map` to map a function to each variable in a dataset: .. ipython:: python np.sin(ds) - ds.apply(np.sin) + ds.map(np.sin) Datasets also use looping over variables for *broadcasting* in binary arithmetic. You can do arithmetic between any ``DataArray`` and a dataset: diff --git a/doc/groupby.rst b/doc/groupby.rst index 52a27f4f160..f5943703765 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -35,10 +35,11 @@ Let's create a simple example dataset: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 3))}, - coords={'x': [10, 20, 30, 40], - 'letters': ('x', list('abba'))}) - arr = ds['foo'] + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 3))}, + coords={"x": [10, 20, 30, 40], "letters": ("x", list("abba"))}, + ) + arr = ds["foo"] ds If we groupby the name of a variable or coordinate in a dataset (we can also @@ -93,7 +94,7 @@ Apply ~~~~~ To apply a function to each group, you can use the flexible -:py:meth:`~xarray.DatasetGroupBy.apply` method. The resulting objects are automatically +:py:meth:`~xarray.DatasetGroupBy.map` method. The resulting objects are automatically concatenated back together along the group axis: .. ipython:: python @@ -101,7 +102,7 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').apply(standardize) + arr.groupby('letters').map(standardize) GroupBy objects also have a :py:meth:`~xarray.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.DatasetGroupBy.mean` as shortcuts for applying an @@ -202,7 +203,7 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen dims=['ny','nx']) da da.groupby('lon').sum(...) - da.groupby('lon').apply(lambda x: x - x.mean(), shortcut=False) + da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 721d1323e73..91644ba2718 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -44,7 +44,7 @@ How do I ... * - convert a possibly irregularly sampled timeseries to a regularly sampled timeseries - :py:meth:`DataArray.resample`, :py:meth:`Dataset.resample` (see :ref:`resampling` for more) * - apply a function on all data variables in a Dataset - - :py:meth:`Dataset.apply` + - :py:meth:`Dataset.map` * - write xarray objects with complex values to a netCDF file - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True`` * - make xarray objects look like other xarray objects diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 7d84199323d..741b3d1a5fe 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -142,7 +142,7 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') labels data.groupby(labels).mean('y') - data.groupby(labels).apply(lambda x: x - x.min()) + data.groupby(labels).map(lambda x: x - x.min()) Plotting -------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d2a4b32a71f..6b3bfb42595 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,13 @@ New Features option for dropping either labels or variables, but using the more specific methods is encouraged. (:pull:`3475`) By `Maximilian Roos `_ +- :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` have been added for + mapping / applying a function over each item in the collection, reflecting the widely used + and least surprising name for this operation. + The existing ``apply`` methods remain for backward compatibility, though using the ``map`` + methods is encouraged. + (:pull:`3459`) + By `Maximilian Roos `_ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) to represent all 'other' dimensions. For example, to move one dimension to the front, use `.transpose('x', ...)`. (:pull:`3421`) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3e4c7903180..5e164f420c8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -920,7 +920,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": Coordinates: * x (x) >> arr.identical(roundtripped) True - See also + See Also -------- DataArray.stack """ @@ -1923,6 +1923,11 @@ def drop( """Backward compatible method based on `drop_vars` and `drop_sel` Using either `drop_vars` or `drop_sel` is encouraged + + See Also + -------- + DataArray.drop_vars + DataArray.drop_sel """ ds = self._to_temp_dataset().drop(labels, dim, errors=errors) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2cadc90334c..dc5a315e72a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3557,6 +3557,11 @@ def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): """Backward compatible method based on `drop_vars` and `drop_sel` Using either `drop_vars` or `drop_sel` is encouraged + + See Also + -------- + Dataset.drop_vars + Dataset.drop_sel """ if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') @@ -4108,14 +4113,14 @@ def reduce( variables, coord_names=coord_names, attrs=attrs, indexes=indexes ) - def apply( + def map( self, func: Callable, keep_attrs: bool = None, args: Iterable[Any] = (), **kwargs: Any, ) -> "Dataset": - """Apply a function over the data variables in this dataset. + """Apply a function to each variable in this dataset Parameters ---------- @@ -4135,7 +4140,7 @@ def apply( Returns ------- applied : Dataset - Resulting dataset from applying ``func`` over each data variable. + Resulting dataset from applying ``func`` to each data variable. Examples -------- @@ -4148,7 +4153,7 @@ def apply( Data variables: foo (dim_0, dim_1) float64 -0.3751 -1.951 -1.945 0.2948 0.711 -0.3948 bar (x) int64 -1 2 - >>> ds.apply(np.fabs) + >>> ds.map(np.fabs) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Dimensions without coordinates: dim_0, dim_1, x @@ -4165,6 +4170,27 @@ def apply( attrs = self.attrs if keep_attrs else None return type(self)(variables, attrs=attrs) + def apply( + self, + func: Callable, + keep_attrs: bool = None, + args: Iterable[Any] = (), + **kwargs: Any, + ) -> "Dataset": + """ + Backward compatible implementation of ``map`` + + See Also + -------- + Dataset.map + """ + warnings.warn( + "Dataset.apply may be deprecated in the future. Using Dataset.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func, keep_attrs, args, **kwargs) + def assign( self, variables: Mapping[Hashable, Any] = None, **variables_kwargs: Hashable ) -> "Dataset": diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c8906e34737..8ae65d9b9df 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -608,7 +608,7 @@ def assign_coords(self, coords=None, **coords_kwargs): Dataset.swap_dims """ coords_kwargs = either_dict_or_kwargs(coords, coords_kwargs, "assign_coords") - return self.apply(lambda ds: ds.assign_coords(**coords_kwargs)) + return self.map(lambda ds: ds.assign_coords(**coords_kwargs)) def _maybe_reorder(xarray_obj, dim, positions): @@ -655,8 +655,8 @@ def lookup_order(dimension): new_order = sorted(stacked.dims, key=lookup_order) return stacked.transpose(*new_order, transpose_coords=self._restore_coord_dims) - def apply(self, func, shortcut=False, args=(), **kwargs): - """Apply a function over each array in the group and concatenate them + def map(self, func, shortcut=False, args=(), **kwargs): + """Apply a function to each array in the group and concatenate them together into a new array. `func` is called like `func(ar, *args, **kwargs)` for each array `ar` @@ -702,6 +702,21 @@ def apply(self, func, shortcut=False, args=(), **kwargs): applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped) return self._combine(applied, shortcut=shortcut) + def apply(self, func, shortcut=False, args=(), **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DataArrayGroupBy.map + """ + warnings.warn( + "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func, shortcut=shortcut, args=args, **kwargs) + def _combine(self, applied, restore_coord_dims=False, shortcut=False): """Recombine the applied objects like the original.""" applied_example, applied = peek_at(applied) @@ -765,7 +780,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): if dim is None: dim = self._group_dim - out = self.apply( + out = self.map( self._obj.__class__.quantile, shortcut=False, q=q, @@ -820,7 +835,7 @@ def reduce_array(ar): check_reduce_dims(dim, self.dims) - return self.apply(reduce_array, shortcut=shortcut) + return self.map(reduce_array, shortcut=shortcut) ops.inject_reduce_methods(DataArrayGroupBy) @@ -828,8 +843,8 @@ def reduce_array(ar): class DatasetGroupBy(GroupBy, ImplementsDatasetReduce): - def apply(self, func, args=(), shortcut=None, **kwargs): - """Apply a function over each Dataset in the group and concatenate them + def map(self, func, args=(), shortcut=None, **kwargs): + """Apply a function to each Dataset in the group and concatenate them together into a new Dataset. `func` is called like `func(ds, *args, **kwargs)` for each dataset `ds` @@ -862,6 +877,22 @@ def apply(self, func, args=(), shortcut=None, **kwargs): applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped()) return self._combine(applied) + def apply(self, func, args=(), shortcut=None, **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DatasetGroupBy.map + """ + + warnings.warn( + "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func, shortcut=shortcut, args=args, **kwargs) + def _combine(self, applied): """Recombine the applied objects like the original.""" applied_example, applied = peek_at(applied) @@ -914,7 +945,7 @@ def reduce_dataset(ds): check_reduce_dims(dim, self.dims) - return self.apply(reduce_dataset) + return self.map(reduce_dataset) def assign(self, **kwargs): """Assign data variables by group. @@ -923,7 +954,7 @@ def assign(self, **kwargs): -------- Dataset.assign """ - return self.apply(lambda ds: ds.assign(**kwargs)) + return self.map(lambda ds: ds.assign(**kwargs)) ops.inject_reduce_methods(DatasetGroupBy) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 2cb1bd55e19..fb388490d06 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,3 +1,5 @@ +import warnings + from . import ops from .groupby import DataArrayGroupBy, DatasetGroupBy @@ -173,8 +175,8 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) - def apply(self, func, shortcut=False, args=(), **kwargs): - """Apply a function over each array in the group and concatenate them + def map(self, func, shortcut=False, args=(), **kwargs): + """Apply a function to each array in the group and concatenate them together into a new array. `func` is called like `func(ar, *args, **kwargs)` for each array `ar` @@ -212,7 +214,9 @@ def apply(self, func, shortcut=False, args=(), **kwargs): applied : DataArray or DataArray The result of splitting, applying and combining this array. """ - combined = super().apply(func, shortcut=shortcut, args=args, **kwargs) + # TODO: the argument order for Resample doesn't match that for its parent, + # GroupBy + combined = super().map(func, shortcut=shortcut, args=args, **kwargs) # If the aggregation function didn't drop the original resampling # dimension, then we need to do so before we can rename the proxy @@ -225,6 +229,21 @@ def apply(self, func, shortcut=False, args=(), **kwargs): return combined + def apply(self, func, args=(), shortcut=None, **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DataArrayResample.map + """ + warnings.warn( + "Resample.apply may be deprecated in the future. Using Resample.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func=func, shortcut=shortcut, args=args, **kwargs) + ops.inject_reduce_methods(DataArrayResample) ops.inject_binary_ops(DataArrayResample) @@ -247,7 +266,7 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) - def apply(self, func, args=(), shortcut=None, **kwargs): + def map(self, func, args=(), shortcut=None, **kwargs): """Apply a function over each Dataset in the groups generated for resampling and concatenate them together into a new Dataset. @@ -282,6 +301,22 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return combined.rename({self._resample_dim: self._dim}) + def apply(self, func, args=(), shortcut=None, **kwargs): + """ + Backward compatible implementation of ``map`` + + See Also + -------- + DataSetResample.map + """ + + warnings.warn( + "Resample.apply may be deprecated in the future. Using Resample.map is encouraged", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.map(func=func, shortcut=shortcut, args=args, **kwargs) + def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index acfe684d220..42fae2c9dd4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2417,7 +2417,7 @@ def test_groupby_properties(self): assert_array_equal(expected_groups[key], grouped.groups[key]) assert 3 == len(grouped) - def test_groupby_apply_identity(self): + def test_groupby_map_identity(self): expected = self.make_groupby_example_array() idx = expected.coords["y"] @@ -2428,7 +2428,7 @@ def identity(x): for shortcut in [False, True]: for squeeze in [False, True]: grouped = expected.groupby(g, squeeze=squeeze) - actual = grouped.apply(identity, shortcut=shortcut) + actual = grouped.map(identity, shortcut=shortcut) assert_identical(expected, actual) def test_groupby_sum(self): @@ -2461,7 +2461,7 @@ def test_groupby_sum(self): [["a", "b", "c"]], ["abc"], ) - actual = array["y"].groupby("abc").apply(np.sum) + actual = array["y"].groupby("abc").map(np.sum) assert_allclose(expected, actual) actual = array["y"].groupby("abc").sum(...) assert_allclose(expected, actual) @@ -2532,7 +2532,7 @@ def test_groupby_reduce_attrs(self): expected.attrs["foo"] = "bar" assert_identical(expected, actual) - def test_groupby_apply_center(self): + def test_groupby_map_center(self): def center(x): return x - np.mean(x) @@ -2545,16 +2545,16 @@ def center(x): ) expected_ds["foo"] = (["x", "y"], exp_data) expected_centered = expected_ds["foo"] - assert_allclose(expected_centered, grouped.apply(center)) + assert_allclose(expected_centered, grouped.map(center)) - def test_groupby_apply_ndarray(self): + def test_groupby_map_ndarray(self): # regression test for #326 array = self.make_groupby_example_array() grouped = array.groupby("abc") - actual = grouped.apply(np.asarray) + actual = grouped.map(np.asarray) assert_equal(array, actual) - def test_groupby_apply_changes_metadata(self): + def test_groupby_map_changes_metadata(self): def change_metadata(x): x.coords["x"] = x.coords["x"] * 2 x.attrs["fruit"] = "lemon" @@ -2562,7 +2562,7 @@ def change_metadata(x): array = self.make_groupby_example_array() grouped = array.groupby("abc") - actual = grouped.apply(change_metadata) + actual = grouped.map(change_metadata) expected = array.copy() expected = change_metadata(expected) assert_equal(expected, actual) @@ -2631,7 +2631,7 @@ def test_groupby_restore_dim_order(self): ("a", ("a", "y")), ("b", ("x", "b")), ]: - result = array.groupby(by).apply(lambda x: x.squeeze()) + result = array.groupby(by).map(lambda x: x.squeeze()) assert result.dims == expected_dims def test_groupby_restore_coord_dims(self): @@ -2651,13 +2651,13 @@ def test_groupby_restore_coord_dims(self): ("a", ("a", "y")), ("b", ("x", "b")), ]: - result = array.groupby(by, restore_coord_dims=True).apply( + result = array.groupby(by, restore_coord_dims=True).map( lambda x: x.squeeze() )["c"] assert result.dims == expected_dims with pytest.warns(FutureWarning): - array.groupby("x").apply(lambda x: x.squeeze()) + array.groupby("x").map(lambda x: x.squeeze()) def test_groupby_first_and_last(self): array = DataArray([1, 2, 3, 4, 5], dims="x") @@ -2699,9 +2699,9 @@ def test_groupby_multidim(self): actual_sum = array.groupby(dim).sum(...) assert_identical(expected_sum, actual_sum) - def test_groupby_multidim_apply(self): + def test_groupby_multidim_map(self): array = self.make_groupby_multidim_example_array() - actual = array.groupby("lon").apply(lambda x: x - x.mean()) + actual = array.groupby("lon").map(lambda x: x - x.mean()) expected = DataArray( [[[-2.5, -6.0], [-5.0, -8.5]], [[2.5, 3.0], [8.0, 8.5]]], coords=array.coords, @@ -2722,7 +2722,7 @@ def test_groupby_bins(self): ) # the problem with this is that it overwrites the dimensions of array! # actual = array.groupby('dim_0', bins=bins).sum() - actual = array.groupby_bins("dim_0", bins).apply(lambda x: x.sum()) + actual = array.groupby_bins("dim_0", bins).map(lambda x: x.sum()) assert_identical(expected, actual) # make sure original array dims are unchanged assert len(array.dim_0) == 4 @@ -2744,12 +2744,12 @@ def test_groupby_bins_multidim(self): bins = [0, 15, 20] bin_coords = pd.cut(array["lat"].values.flat, bins).categories expected = DataArray([16, 40], dims="lat_bins", coords={"lat_bins": bin_coords}) - actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum()) + actual = array.groupby_bins("lat", bins).map(lambda x: x.sum()) assert_identical(expected, actual) # modify the array coordinates to be non-monotonic after unstacking array["lat"].data = np.array([[10.0, 20.0], [20.0, 10.0]]) expected = DataArray([28, 28], dims="lat_bins", coords={"lat_bins": bin_coords}) - actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum()) + actual = array.groupby_bins("lat", bins).map(lambda x: x.sum()) assert_identical(expected, actual) def test_groupby_bins_sort(self): @@ -2784,7 +2784,7 @@ def func(arg1, arg2, arg3=0.0): times = pd.date_range("2000", periods=3, freq="D") da = xr.DataArray([1.0, 1.0, 1.0], coords=[times], dims=["time"]) expected = xr.DataArray([3.0, 3.0, 3.0], coords=[times], dims=["time"]) - actual = da.resample(time="D").apply(func, args=(1.0,), arg3=1.0) + actual = da.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(actual, expected) def test_resample_first(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 50e78c9f685..d001c43da94 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3310,17 +3310,17 @@ def identity(x): return x for k in ["x", "c", "y"]: - actual = data.groupby(k, squeeze=False).apply(identity) + actual = data.groupby(k, squeeze=False).map(identity) assert_equal(data, actual) def test_groupby_returns_new_type(self): data = Dataset({"z": (["x", "y"], np.random.randn(3, 5))}) - actual = data.groupby("x").apply(lambda ds: ds["z"]) + actual = data.groupby("x").map(lambda ds: ds["z"]) expected = data["z"] assert_identical(expected, actual) - actual = data["z"].groupby("x").apply(lambda x: x.to_dataset()) + actual = data["z"].groupby("x").map(lambda x: x.to_dataset()) expected = data assert_identical(expected, actual) @@ -3639,7 +3639,7 @@ def func(arg1, arg2, arg3=0.0): times = pd.date_range("2000", freq="D", periods=3) ds = xr.Dataset({"foo": ("time", [1.0, 1.0, 1.0]), "time": times}) expected = xr.Dataset({"foo": ("time", [3.0, 3.0, 3.0]), "time": times}) - actual = ds.resample(time="D").apply(func, args=(1.0,), arg3=1.0) + actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(expected, actual) def test_to_array(self): @@ -4515,31 +4515,36 @@ def test_count(self): actual = ds.count() assert_identical(expected, actual) - def test_apply(self): + def test_map(self): data = create_test_data() data.attrs["foo"] = "bar" - assert_identical(data.apply(np.mean), data.mean()) + assert_identical(data.map(np.mean), data.mean()) expected = data.mean(keep_attrs=True) - actual = data.apply(lambda x: x.mean(keep_attrs=True), keep_attrs=True) + actual = data.map(lambda x: x.mean(keep_attrs=True), keep_attrs=True) assert_identical(expected, actual) - assert_identical( - data.apply(lambda x: x, keep_attrs=True), data.drop_vars("time") - ) + assert_identical(data.map(lambda x: x, keep_attrs=True), data.drop_vars("time")) def scale(x, multiple=1): return multiple * x - actual = data.apply(scale, multiple=2) + actual = data.map(scale, multiple=2) assert_equal(actual["var1"], 2 * data["var1"]) assert_identical(actual["numbers"], data["numbers"]) - actual = data.apply(np.asarray) + actual = data.map(np.asarray) expected = data.drop_vars("time") # time is not used on a data var assert_equal(expected, actual) + def test_apply_pending_deprecated_map(self): + data = create_test_data() + data.attrs["foo"] = "bar" + + with pytest.warns(PendingDeprecationWarning): + assert_identical(data.apply(np.mean), data.mean()) + def make_example_math_dataset(self): variables = { "bar": ("x", np.arange(100, 400, 100)), @@ -4566,15 +4571,15 @@ def test_dataset_number_math(self): def test_unary_ops(self): ds = self.make_example_math_dataset() - assert_identical(ds.apply(abs), abs(ds)) - assert_identical(ds.apply(lambda x: x + 4), ds + 4) + assert_identical(ds.map(abs), abs(ds)) + assert_identical(ds.map(lambda x: x + 4), ds + 4) for func in [ lambda x: x.isnull(), lambda x: x.round(), lambda x: x.astype(int), ]: - assert_identical(ds.apply(func), func(ds)) + assert_identical(ds.map(func), func(ds)) assert_identical(ds.isnull(), ~ds.notnull()) @@ -4587,7 +4592,7 @@ def test_unary_ops(self): def test_dataset_array_math(self): ds = self.make_example_math_dataset() - expected = ds.apply(lambda x: x - ds["foo"]) + expected = ds.map(lambda x: x - ds["foo"]) assert_identical(expected, ds - ds["foo"]) assert_identical(expected, -ds["foo"] + ds) assert_identical(expected, ds - ds["foo"].variable) @@ -4596,7 +4601,7 @@ def test_dataset_array_math(self): actual -= ds["foo"] assert_identical(expected, actual) - expected = ds.apply(lambda x: x + ds["bar"]) + expected = ds.map(lambda x: x + ds["bar"]) assert_identical(expected, ds + ds["bar"]) actual = ds.copy(deep=True) actual += ds["bar"] @@ -4612,7 +4617,7 @@ def test_dataset_dataset_math(self): assert_identical(ds, ds + 0 * ds) assert_identical(ds, ds + {"foo": 0, "bar": 0}) - expected = ds.apply(lambda x: 2 * x) + expected = ds.map(lambda x: 2 * x) assert_identical(expected, 2 * ds) assert_identical(expected, ds + ds) assert_identical(expected, ds + ds.data_vars) @@ -4709,7 +4714,7 @@ def test_dataset_transpose(self): assert_identical(expected, actual) actual = ds.transpose("x", "y") - expected = ds.apply(lambda x: x.transpose("x", "y", transpose_coords=True)) + expected = ds.map(lambda x: x.transpose("x", "y", transpose_coords=True)) assert_identical(expected, actual) ds = create_test_data() diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index e2216547ac8..581affa3471 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -45,14 +45,14 @@ def test_groupby_dims_property(dataset): assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims -def test_multi_index_groupby_apply(dataset): +def test_multi_index_groupby_map(dataset): # regression test for GH873 ds = dataset.isel(z=1, drop=True)[["foo"]] expected = 2 * ds actual = ( ds.stack(space=["x", "y"]) .groupby("space") - .apply(lambda x: 2 * x) + .map(lambda x: 2 * x) .unstack("space") ) assert_equal(expected, actual) @@ -107,23 +107,23 @@ def test_groupby_input_mutation(): assert_identical(array, array_copy) # should not modify inputs -def test_da_groupby_apply_func_args(): +def test_da_groupby_map_func_args(): def func(arg1, arg2, arg3=0): return arg1 + arg2 + arg3 array = xr.DataArray([1, 1, 1], [("x", [1, 2, 3])]) expected = xr.DataArray([3, 3, 3], [("x", [1, 2, 3])]) - actual = array.groupby("x").apply(func, args=(1,), arg3=1) + actual = array.groupby("x").map(func, args=(1,), arg3=1) assert_identical(expected, actual) -def test_ds_groupby_apply_func_args(): +def test_ds_groupby_map_func_args(): def func(arg1, arg2, arg3=0): return arg1 + arg2 + arg3 dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]}) expected = xr.Dataset({"foo": ("x", [3, 3, 3])}, {"x": [1, 2, 3]}) - actual = dataset.groupby("x").apply(func, args=(1,), arg3=1) + actual = dataset.groupby("x").map(func, args=(1,), arg3=1) assert_identical(expected, actual) @@ -285,7 +285,7 @@ def test_groupby_drops_nans(): expected.variable.values[0, 0, :] = np.nan expected.variable.values[-1, -1, :] = np.nan expected.variable.values[3, 0, :] = np.nan - actual = grouped.apply(lambda x: x).transpose(*ds.variable.dims) + actual = grouped.map(lambda x: x).transpose(*ds.variable.dims) assert_identical(actual, expected) # reduction along grouped dimension diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 8e2d4b8e064..a31da162487 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -339,7 +339,7 @@ def test_dataarray_property(prop): (do("copy"), True), (do("count"), False), (do("diff", "x"), True), - (do("drop", "x"), True), + (do("drop_vars", "x"), True), (do("expand_dims", {"z": 2}, axis=2), True), (do("get_axis_num", "x"), False), (do("get_index", "x"), False), From 897b5d153a9fc12c072f8f6d8fa07f8deec679d3 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 9 Nov 2019 16:30:32 -0500 Subject: [PATCH 11/15] remove syntax warning (#3505) --- xarray/plot/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index ca68f617144..5c754c3f49b 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -288,7 +288,7 @@ def line( ) # The allargs dict passed to _easy_facetgrid above contains args - if args is (): + if args == (): args = kwargs.pop("args", ()) else: assert "args" not in kwargs From 7ace0d580c09894e35f111f4adc04a3c845b01d5 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 9 Nov 2019 17:28:52 -0500 Subject: [PATCH 12/15] add drop_sel, drop_vars, map to api.rst (#3506) --- doc/api.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index b176e3d5e3f..d2309f28226 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -94,7 +94,7 @@ Dataset contents Dataset.rename_dims Dataset.swap_dims Dataset.expand_dims - Dataset.drop + Dataset.drop_vars Dataset.drop_dims Dataset.set_coords Dataset.reset_coords @@ -118,6 +118,7 @@ Indexing Dataset.loc Dataset.isel Dataset.sel + Dataset.drop_sel Dataset.head Dataset.tail Dataset.thin @@ -154,7 +155,7 @@ Computation .. autosummary:: :toctree: generated/ - Dataset.apply + Dataset.map Dataset.reduce Dataset.groupby Dataset.groupby_bins @@ -263,7 +264,7 @@ DataArray contents DataArray.rename DataArray.swap_dims DataArray.expand_dims - DataArray.drop + DataArray.drop_vars DataArray.reset_coords DataArray.copy @@ -283,6 +284,7 @@ Indexing DataArray.loc DataArray.isel DataArray.sel + DataArray.drop_sel DataArray.head DataArray.tail DataArray.thin @@ -542,10 +544,10 @@ GroupBy objects :toctree: generated/ core.groupby.DataArrayGroupBy - core.groupby.DataArrayGroupBy.apply + core.groupby.DataArrayGroupBy.map core.groupby.DataArrayGroupBy.reduce core.groupby.DatasetGroupBy - core.groupby.DatasetGroupBy.apply + core.groupby.DatasetGroupBy.map core.groupby.DatasetGroupBy.reduce Rolling objects @@ -566,7 +568,7 @@ Resample objects ================ Resample objects also implement the GroupBy interface -(methods like ``apply()``, ``reduce()``, ``mean()``, ``sum()``, etc.). +(methods like ``map()``, ``reduce()``, ``mean()``, ``sum()``, etc.). .. autosummary:: :toctree: generated/ From f14edf326d41ce96be5d8fd42d56e80e3faf5ce7 Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Sat, 9 Nov 2019 17:19:16 -0800 Subject: [PATCH 13/15] DOC: update bottleneck repo url (#3507) --- doc/computation.rst | 2 +- doc/dask.rst | 2 +- doc/installing.rst | 2 +- doc/whats-new.rst | 2 +- setup.cfg | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index d477cb63d72..663c546be20 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -183,7 +183,7 @@ a value when aggregating: Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects. -.. _bottleneck: https://github.com/kwgoodman/bottleneck/ +.. _bottleneck: https://github.com/pydata/bottleneck/ We can also manually iterate through ``Rolling`` objects: diff --git a/doc/dask.rst b/doc/dask.rst index 5bdbf779463..11f378aa376 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -292,7 +292,7 @@ For the best performance when using Dask's multi-threaded scheduler, wrap a function that already releases the global interpreter lock, which fortunately already includes most NumPy and Scipy functions. Here we show an example using NumPy operations and a fast function from -`bottleneck `__, which +`bottleneck `__, which we use to calculate `Spearman's rank-correlation coefficient `__: .. code-block:: python diff --git a/doc/installing.rst b/doc/installing.rst index 0c5e8916ca3..219cf109efe 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -43,7 +43,7 @@ For accelerating xarray - `scipy `__: necessary to enable the interpolation features for xarray objects -- `bottleneck `__: speeds up +- `bottleneck `__: speeds up NaN-skipping and rolling window aggregations by a large factor - `numbagg `_: for exponential rolling window operations diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6b3bfb42595..d9c85a19ce6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3736,7 +3736,7 @@ Breaking changes warnings: methods and attributes that were deprecated in xray v0.3 or earlier (e.g., ``dimensions``, ``attributes```) have gone away. -.. _bottleneck: https://github.com/kwgoodman/bottleneck +.. _bottleneck: https://github.com/pydata/bottleneck Enhancements ~~~~~~~~~~~~ diff --git a/setup.cfg b/setup.cfg index fec2ca6bbe4..21158e3b0ee 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [tool:pytest] python_files=test_*.py testpaths=xarray/tests properties -# Fixed upstream in https://github.com/kwgoodman/bottleneck/pull/199 +# Fixed upstream in https://github.com/pydata/bottleneck/pull/199 filterwarnings = ignore:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning env = From 4e9240a2087ffbf119919e1ac98046bbf164f94d Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 11 Nov 2019 00:41:51 +0100 Subject: [PATCH 14/15] add missing pint integration tests (#3508) * add tests for broadcast_like * add tests for DataArray head / tail / thin * update whats-new.rst --- doc/whats-new.rst | 2 +- xarray/tests/test_units.py | 107 +++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d9c85a19ce6..96f0ba9a4a6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -111,7 +111,7 @@ Internal Changes ~~~~~~~~~~~~~~~~ - Added integration tests against `pint `_. - (:pull:`3238`, :pull:`3447`) by `Justus Magin `_. + (:pull:`3238`, :pull:`3447`, :pull:`3508`) by `Justus Magin `_. .. note:: diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 8eed1f0dbe3..fd9e9b039ac 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1045,6 +1045,36 @@ def test_comparisons(self, func, variation, unit, dtype): assert expected == result + @pytest.mark.xfail(reason="blocked by `where`") + @pytest.mark.parametrize( + "unit", + ( + pytest.param(1, id="no_unit"), + pytest.param(unit_registry.dimensionless, id="dimensionless"), + pytest.param(unit_registry.s, id="incompatible_unit"), + pytest.param(unit_registry.cm, id="compatible_unit"), + pytest.param(unit_registry.m, id="identical_unit"), + ), + ) + def test_broadcast_like(self, unit, dtype): + array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa + array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa + + x1 = np.arange(2) * unit_registry.m + x2 = np.arange(2) * unit + y1 = np.array([0]) * unit_registry.m + y2 = np.arange(3) * unit + + arr1 = xr.DataArray(data=array1, coords={"x": x1, "y": y1}, dims=("x", "y")) + arr2 = xr.DataArray(data=array2, coords={"x": x2, "y": y2}, dims=("x", "y")) + + expected = attach_units( + strip_units(arr1).broadcast_like(strip_units(arr2)), extract_units(arr1) + ) + result = arr1.broadcast_like(arr2) + + assert_equal_with_units(expected, result) + @pytest.mark.parametrize( "unit", ( @@ -1303,6 +1333,49 @@ def test_squeeze(self, shape, dtype): np.squeeze(array, axis=index), data_array.squeeze(dim=name) ) + @pytest.mark.xfail( + reason="indexes strip units and head / tail / thin only support integers" + ) + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "func", + (method("head", x=7, y=3), method("tail", x=7, y=3), method("thin", x=7, y=3)), + ids=repr, + ) + def test_head_tail_thin(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK + + coords = { + "x": np.arange(10) * unit_registry.m, + "y": np.arange(5) * unit_registry.m, + } + + arr = xr.DataArray(data=array, coords=coords, dims=("x", "y")) + + kwargs = {name: value * unit for name, value in func.kwargs.items()} + + if error is not None: + with pytest.raises(error): + func(arr, **kwargs) + + return + + expected = attach_units(func(strip_units(arr)), extract_units(arr)) + result = func(arr, **kwargs) + + assert_equal_with_units(expected, result) + @pytest.mark.parametrize( "unit,error", ( @@ -2472,6 +2545,40 @@ def test_comparisons(self, func, variation, unit, dtype): assert expected == result + @pytest.mark.xfail(reason="blocked by `where`") + @pytest.mark.parametrize( + "unit", + ( + pytest.param(1, id="no_unit"), + pytest.param(unit_registry.dimensionless, id="dimensionless"), + pytest.param(unit_registry.s, id="incompatible_unit"), + pytest.param(unit_registry.cm, id="compatible_unit"), + pytest.param(unit_registry.m, id="identical_unit"), + ), + ) + def test_broadcast_like(self, unit, dtype): + array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa + array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa + + x1 = np.arange(2) * unit_registry.m + x2 = np.arange(2) * unit + y1 = np.array([0]) * unit_registry.m + y2 = np.arange(3) * unit + + ds1 = xr.Dataset( + data_vars={"a": (("x", "y"), array1)}, coords={"x": x1, "y": y1} + ) + ds2 = xr.Dataset( + data_vars={"a": (("x", "y"), array2)}, coords={"x": x2, "y": y2} + ) + + expected = attach_units( + strip_units(ds1).broadcast_like(strip_units(ds2)), extract_units(ds1) + ) + result = ds1.broadcast_like(ds2) + + assert_equal_with_units(expected, result) + @pytest.mark.parametrize( "unit", ( From b74f80ca2df4920f711f9fe5762458c53ce3c2c6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 11 Nov 2019 23:31:38 -0500 Subject: [PATCH 15/15] format indexing.rst code with black (#3511) --- doc/indexing.rst | 138 ++++++++++++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 56 deletions(-) diff --git a/doc/indexing.rst b/doc/indexing.rst index ace960689a8..e8482ac66b3 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -209,12 +209,16 @@ simultaneously, returning a new dataset: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) - ds = da.to_dataset(name='foo') + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) + ds = da.to_dataset(name="foo") ds.isel(space=[0], time=[0]) - ds.sel(time='2000-01-01') + ds.sel(time="2000-01-01") Positional indexing on a dataset is not supported because the ordering of dimensions in a dataset is somewhat ambiguous (it can vary between different @@ -222,7 +226,6 @@ arrays). However, you can do normal indexing with dimension names: .. ipython:: python - ds[dict(space=[0], time=[0])] ds.loc[dict(time='2000-01-01')] @@ -248,7 +251,6 @@ Any variables with these dimensions are also dropped: ds.drop_dims('time') - .. _masking with where: Masking with ``where`` @@ -326,8 +328,12 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: .. ipython:: python - da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'], - coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']}) + + da = xr.DataArray( + np.arange(12).reshape((3, 4)), + dims=["x", "y"], + coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]}, + ) da da[[0, 1], [1, 1]] @@ -410,43 +416,56 @@ can use indexing with ``.loc`` : .. ipython:: python - ds = xr.tutorial.open_dataset('air_temperature') + ds = xr.tutorial.open_dataset("air_temperature") - #add an empty 2D dataarray - ds['empty']= xr.full_like(ds.air.mean('time'),fill_value=0) + # add an empty 2D dataarray + ds["empty"] = xr.full_like(ds.air.mean("time"), fill_value=0) - #modify one grid point using loc() - ds['empty'].loc[dict(lon=260, lat=30)] = 100 + # modify one grid point using loc() + ds["empty"].loc[dict(lon=260, lat=30)] = 100 - #modify a 2D region using loc() - lc = ds.coords['lon'] - la = ds.coords['lat'] - ds['empty'].loc[dict(lon=lc[(lc>220)&(lc<260)], lat=la[(la>20)&(la<60)])] = 100 + # modify a 2D region using loc() + lc = ds.coords["lon"] + la = ds.coords["lat"] + ds["empty"].loc[ + dict(lon=lc[(lc > 220) & (lc < 260)], lat=la[(la > 20) & (la < 60)]) + ] = 100 or :py:meth:`~xarray.where`: .. ipython:: python - #modify one grid point using xr.where() - ds['empty'] = xr.where((ds.coords['lat']==20)&(ds.coords['lon']==260), 100, ds['empty']) + # modify one grid point using xr.where() + ds["empty"] = xr.where( + (ds.coords["lat"] == 20) & (ds.coords["lon"] == 260), 100, ds["empty"] + ) + + # or modify a 2D region using xr.where() + mask = ( + (ds.coords["lat"] > 20) + & (ds.coords["lat"] < 60) + & (ds.coords["lon"] > 220) + & (ds.coords["lon"] < 260) + ) + ds["empty"] = xr.where(mask, 100, ds["empty"]) - #or modify a 2D region using xr.where() - mask = (ds.coords['lat']>20)&(ds.coords['lat']<60)&(ds.coords['lon']>220)&(ds.coords['lon']<260) - ds['empty'] = xr.where(mask, 100, ds['empty']) Vectorized indexing can also be used to assign values to xarray object. .. ipython:: python - da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'], - coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']}) + da = xr.DataArray( + np.arange(12).reshape((3, 4)), + dims=["x", "y"], + coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]}, + ) da da[0] = -1 # assignment with broadcasting da - ind_x = xr.DataArray([0, 1], dims=['x']) - ind_y = xr.DataArray([0, 1], dims=['y']) + ind_x = xr.DataArray([0, 1], dims=["x"]) + ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] = -2 # assign -2 to (ix, iy) = (0, 0) and (1, 1) da @@ -508,10 +527,10 @@ flexible indexing. The following is an example of the pointwise indexing: .. ipython:: python - da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y']) + da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"]) da - da.isel(x=xr.DataArray([0, 1, 6], dims='z'), - y=xr.DataArray([0, 1, 0], dims='z')) + da.isel(x=xr.DataArray([0, 1, 6], dims="z"), y=xr.DataArray([0, 1, 0], dims="z")) + where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected and mapped along a new dimension ``z``. @@ -521,23 +540,27 @@ you can supply a :py:class:`~xarray.DataArray` with a coordinate, .. ipython:: python - da.isel(x=xr.DataArray([0, 1, 6], dims='z', - coords={'z': ['a', 'b', 'c']}), - y=xr.DataArray([0, 1, 0], dims='z')) - + da.isel( + x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}), + y=xr.DataArray([0, 1, 0], dims="z"), + ) + Analogously, label-based pointwise-indexing is also possible by the ``.sel`` method: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) - times = xr.DataArray(pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']), - dims='new_time') - da.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']), - time=times) - + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) + times = xr.DataArray( + pd.to_datetime(["2000-01-03", "2000-01-02", "2000-01-01"]), dims="new_time" + ) + da.sel(space=xr.DataArray(["IA", "IL", "IN"], dims=["new_time"]), time=times) .. _align and reindex: @@ -635,12 +658,16 @@ through the :py:attr:`~xarray.DataArray.indexes` attribute. .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) da da.indexes - da.indexes['time'] + da.indexes["time"] Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension, falling back to a default :py:class:`pandas.RangeIndex` if it has no coordinate @@ -694,32 +721,31 @@ pandas: .. ipython:: python - midx = pd.MultiIndex.from_product([list('abc'), [0, 1]], - names=('one', 'two')) - mda = xr.DataArray(np.random.rand(6, 3), - [('x', midx), ('y', range(3))]) - mda - mda.sel(x=(list('ab'), [0])) + + midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) + mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) + mda + mda.sel(x=(list("ab"), [0])) You can also select multiple elements by providing a list of labels or tuples or a slice of tuples: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + mda.sel(x=[('a', 0), ('b', 1)]) Additionally, xarray supports dictionaries: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + mda.sel(x={'one': 'a', 'two': 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. ipython:: python - mda.sel(one='a', two=0) + mda.sel(one='a', two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension @@ -731,7 +757,7 @@ multi-index is reduced to a single index. .. ipython:: python - mda.loc[{'one': 'a'}, ...] + mda.loc[{'one': 'a'}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for