From 0e3089c12ddc32eeed26f5c1bfebb36447146659 Mon Sep 17 00:00:00 2001 From: Spencer Jones <41342785+cspencerjones@users.noreply.github.com> Date: Fri, 23 Aug 2019 10:00:39 -0700 Subject: [PATCH 01/19] Updater to testing environment name (#3253) The testing environment name has been updated to `xarray-tests` in the package and we should do this in the documentation as well. --- doc/contributing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/contributing.rst b/doc/contributing.rst index 429c282a95f..9017c3dde7c 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -152,10 +152,10 @@ We'll now kick off a two-step process: # Create and activate the build environment conda env create -f ci/requirements/py36.yml - conda activate test_env + conda activate xarray-tests # or with older versions of Anaconda: - source activate test_env + source activate xarray-tests # Build and install xarray pip install -e . From 5c6aebccf4eeedbe9de186836a0913f00ea157db Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Date: Wed, 4 Sep 2019 23:22:23 -0500 Subject: [PATCH 02/19] Add head, tail and thin methods (#3278) * Add head, tail and thin methods * Update api and whats-new * Fix pep8 issues * Fix typo * Tests for DataArray --- doc/api.rst | 6 +++ doc/whats-new.rst | 5 +- xarray/core/dataarray.py | 49 ++++++++++++++++++++ xarray/core/dataset.py | 84 ++++++++++++++++++++++++++++++++++ xarray/tests/test_dataarray.py | 13 ++++++ xarray/tests/test_dataset.py | 32 +++++++++++++ 6 files changed, 188 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 872e7786e1b..fb6e037a4f2 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -117,6 +117,9 @@ Indexing Dataset.loc Dataset.isel Dataset.sel + Dataset.head + Dataset.tail + Dataset.thin Dataset.squeeze Dataset.interp Dataset.interp_like @@ -279,6 +282,9 @@ Indexing DataArray.loc DataArray.isel DataArray.sel + Dataset.head + Dataset.tail + Dataset.thin DataArray.squeeze DataArray.interp DataArray.interp_like diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8796c79da4c..1e5855df51f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -87,6 +87,9 @@ New functions/methods Currently only :py:meth:`Dataset.plot.scatter` is implemented. By `Yohai Bar Sinai `_ and `Deepak Cherian `_ +- Added `head`, `tail` and `thin` methods to `Dataset` and `DataArray`. (:issue:`319`) + By `Gerardo Rivera `_. + Enhancements ~~~~~~~~~~~~ @@ -102,7 +105,7 @@ Enhancements - Added the ability to initialize an empty or full DataArray with a single value. (:issue:`277`) - By `Gerardo Rivera `_. + By `Gerardo Rivera `_. - :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e5d53b1943a..8660fa952b1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1040,6 +1040,55 @@ def sel( ) return self._from_temp_dataset(ds) + def head( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by the the first `n` + values along the specified dimension(s). + + See Also + -------- + Dataset.head + DataArray.tail + DataArray.thin + """ + + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") + ds = self._to_temp_dataset().head(indexers=indexers) + return self._from_temp_dataset(ds) + + def tail( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by the the last `n` + values along the specified dimension(s). + + See Also + -------- + Dataset.tail + DataArray.head + DataArray.thin + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + ds = self._to_temp_dataset().tail(indexers=indexers) + return self._from_temp_dataset(ds) + + def thin( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by each `n` value + along the specified dimension(s). + + See Also + -------- + Dataset.thin + DataArray.head + DataArray.tail + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + ds = self._to_temp_dataset().thin(indexers=indexers) + return self._from_temp_dataset(ds) + def broadcast_like( self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None ) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f3ad4650b38..1476c1ba646 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2008,6 +2008,90 @@ def sel( result = self.isel(indexers=pos_indexers, drop=drop) return result._overwrite_indexes(new_indexes) + def head( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with the first `n` values of each array + for the specified dimension(s). + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwargs : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.tail + Dataset.thin + DataArray.head + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") + indexers = {k: slice(val) for k, val in indexers.items()} + return self.isel(indexers) + + def tail( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with the last `n` values of each array + for the specified dimension(s). + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwargs : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.head + Dataset.thin + DataArray.tail + """ + + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + indexers = { + k: slice(-val, None) if val != 0 else slice(val) + for k, val in indexers.items() + } + return self.isel(indexers) + + def thin( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with each array indexed along every `n`th + value for the specified dimension(s) + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwargs : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.head + Dataset.tail + DataArray.thin + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + if 0 in indexers.values(): + raise ValueError("step cannot be zero") + indexers = {k: slice(None, None, val) for k, val in indexers.items()} + return self.isel(indexers) + def broadcast_like( self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None ) -> "Dataset": diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2fc86d777aa..27e6ab92f71 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1002,6 +1002,19 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + def test_head(self): + assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5)) + assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0)) + + def test_tail(self): + assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5)) + assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0)) + + def test_thin(self): + assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5)) + with raises_regex(ValueError, "cannot be zero"): + self.dv.thin(time=0) + def test_loc(self): self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3953e6c4146..d9f0284969e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1411,6 +1411,38 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + def test_head(self): + data = create_test_data() + + expected = data.isel(time=slice(5), dim2=slice(6)) + actual = data.head(time=5, dim2=6) + assert_equal(expected, actual) + + expected = data.isel(time=slice(0)) + actual = data.head(time=0) + assert_equal(expected, actual) + + def test_tail(self): + data = create_test_data() + + expected = data.isel(time=slice(-5, None), dim2=slice(-6, None)) + actual = data.tail(time=5, dim2=6) + assert_equal(expected, actual) + + expected = data.isel(dim1=slice(0)) + actual = data.tail(dim1=0) + assert_equal(expected, actual) + + def test_thin(self): + data = create_test_data() + + expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6)) + actual = data.thin(time=5, dim2=6) + assert_equal(expected, actual) + + with raises_regex(ValueError, "cannot be zero"): + data.thin(time=0) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_sel_fancy(self): data = create_test_data() From 0a046dbdeff409728b4e9bf55fba9d2aae9acd07 Mon Sep 17 00:00:00 2001 From: ulijh Date: Sat, 7 Sep 2019 01:15:18 +0200 Subject: [PATCH 03/19] Make argmin/max work lazy with dask (#3244) * Make argmin/max work lazy with dask (#3237). * dask: Testing number of computes on reduce methods. * what's new updated * Fix typo Co-Authored-By: Stephan Hoyer * Be more explicit. Co-Authored-By: Stephan Hoyer * More explicit raise_if_dask_computes * nanargmin/max: only set fill_value when needed --- doc/whats-new.rst | 2 ++ xarray/core/nanops.py | 29 +++++---------------- xarray/core/nputils.py | 2 ++ xarray/tests/test_dask.py | 54 ++++++++++++++++++++++++++++++++++----- 4 files changed, 57 insertions(+), 30 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1e5855df51f..61a1fa59388 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -156,6 +156,8 @@ Bug fixes - Fix error that arises when using open_mfdataset on a series of netcdf files having differing values for a variable attribute of type list. (:issue:`3034`) By `Hasan Ahmad `_. +- Prevent :py:meth:`~xarray.DataArray.argmax` and :py:meth:`~xarray.DataArray.argmin` from calling + dask compute (:issue:`3237`). By `Ulrich Herter `_. .. _whats-new.0.12.3: diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 9ba4eae29ae..17240faf007 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -88,38 +88,21 @@ def nanmax(a, axis=None, out=None): def nanargmin(a, axis=None): - fill_value = dtypes.get_pos_infinity(a.dtype) if a.dtype.kind == "O": + fill_value = dtypes.get_pos_infinity(a.dtype) return _nan_argminmax_object("argmin", fill_value, a, axis=axis) - a, mask = _replace_nan(a, fill_value) - if isinstance(a, dask_array_type): - res = dask_array.argmin(a, axis=axis) - else: - res = np.argmin(a, axis=axis) - if mask is not None: - mask = mask.all(axis=axis) - if mask.any(): - raise ValueError("All-NaN slice encountered") - return res + module = dask_array if isinstance(a, dask_array_type) else nputils + return module.nanargmin(a, axis=axis) def nanargmax(a, axis=None): - fill_value = dtypes.get_neg_infinity(a.dtype) if a.dtype.kind == "O": + fill_value = dtypes.get_neg_infinity(a.dtype) return _nan_argminmax_object("argmax", fill_value, a, axis=axis) - a, mask = _replace_nan(a, fill_value) - if isinstance(a, dask_array_type): - res = dask_array.argmax(a, axis=axis) - else: - res = np.argmax(a, axis=axis) - - if mask is not None: - mask = mask.all(axis=axis) - if mask.any(): - raise ValueError("All-NaN slice encountered") - return res + module = dask_array if isinstance(a, dask_array_type) else nputils + return module.nanargmax(a, axis=axis) def nansum(a, axis=None, dtype=None, out=None, min_count=None): diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 769af03fe6a..df36c98f94c 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -237,3 +237,5 @@ def f(values, axis=None, **kwargs): nanprod = _create_bottleneck_method("nanprod") nancumsum = _create_bottleneck_method("nancumsum") nancumprod = _create_bottleneck_method("nancumprod") +nanargmin = _create_bottleneck_method("nanargmin") +nanargmax = _create_bottleneck_method("nanargmax") diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index e3fc6f65e0f..d105765481e 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -27,14 +27,49 @@ dd = pytest.importorskip("dask.dataframe") +class CountingScheduler: + """ Simple dask scheduler counting the number of computes. + + Reference: https://stackoverflow.com/questions/53289286/ """ + + def __init__(self, max_computes=0): + self.total_computes = 0 + self.max_computes = max_computes + + def __call__(self, dsk, keys, **kwargs): + self.total_computes += 1 + if self.total_computes > self.max_computes: + raise RuntimeError( + "Too many computes. Total: %d > max: %d." + % (self.total_computes, self.max_computes) + ) + return dask.get(dsk, keys, **kwargs) + + +def _set_dask_scheduler(scheduler=dask.get): + """ Backwards compatible way of setting scheduler. """ + if LooseVersion(dask.__version__) >= LooseVersion("0.18.0"): + return dask.config.set(scheduler=scheduler) + return dask.set_options(get=scheduler) + + +def raise_if_dask_computes(max_computes=0): + scheduler = CountingScheduler(max_computes) + return _set_dask_scheduler(scheduler) + + +def test_raise_if_dask_computes(): + data = da.from_array(np.random.RandomState(0).randn(4, 6), chunks=(2, 2)) + with raises_regex(RuntimeError, "Too many computes"): + with raise_if_dask_computes(): + data.compute() + + class DaskTestCase: def assertLazyAnd(self, expected, actual, test): - - with ( - dask.config.set(scheduler="single-threaded") - if LooseVersion(dask.__version__) >= LooseVersion("0.18.0") - else dask.set_options(get=dask.get) - ): + with _set_dask_scheduler(dask.get): + # dask.get is the syncronous scheduler, which get's set also by + # dask.config.set(scheduler="syncronous") in current versions. test(actual, expected) if isinstance(actual, Dataset): @@ -174,7 +209,12 @@ def test_reduce(self): v = self.lazy_var self.assertLazyAndAllClose(u.mean(), v.mean()) self.assertLazyAndAllClose(u.std(), v.std()) - self.assertLazyAndAllClose(u.argmax(dim="x"), v.argmax(dim="x")) + with raise_if_dask_computes(): + actual = v.argmax(dim="x") + self.assertLazyAndAllClose(u.argmax(dim="x"), actual) + with raise_if_dask_computes(): + actual = v.argmin(dim="x") + self.assertLazyAndAllClose(u.argmin(dim="x"), actual) self.assertLazyAndAllClose((u > 1).any(), (v > 1).any()) self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x")) with raises_regex(NotImplementedError, "dask"): From d1260443d065c3f2ec3f8eb3d999c59a695b35a2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 8 Sep 2019 18:58:15 -0400 Subject: [PATCH 04/19] Remove some deprecations (#3292) * remove some deprecations * whatsnew --- doc/whats-new.rst | 7 +++++++ xarray/__init__.py | 2 +- xarray/core/alignment.py | 21 ++------------------- xarray/core/dataarray.py | 12 +++--------- xarray/core/variable.py | 10 ---------- xarray/tests/test_dataarray.py | 12 ++++-------- xarray/tests/test_dataset.py | 6 ++---- 7 files changed, 19 insertions(+), 51 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 61a1fa59388..e65f052ca8c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,6 +49,13 @@ Breaking changes crash in a later release. (:issue:`3250`) by `Guido Imperiale `_. +- :py:meth:`~Dataset.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous + positional arguments were deprecated) +- Reindexing with variables of a different dimension now raise an error (previously deprecated) +- :py:func:`~xarray.broadcast_array` is removed (previously deprecated in favor of + :py:func:`~xarray.broadcast`) +- :py:meth:`~Variable.expand_dims` is removed (previously deprecated in favor of + :py:meth:`~Variable.set_dims`) New functions/methods ~~~~~~~~~~~~~~~~~~~~~ diff --git a/xarray/__init__.py b/xarray/__init__.py index a3df034f7c7..cdca708e28c 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -6,7 +6,7 @@ __version__ = get_versions()["version"] del get_versions -from .core.alignment import align, broadcast, broadcast_arrays +from .core.alignment import align, broadcast from .core.common import full_like, zeros_like, ones_like from .core.concat import concat from .core.combine import combine_by_coords, combine_nested, auto_combine diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 9aeef63e891..d63718500bc 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -1,6 +1,5 @@ import functools import operator -import warnings from collections import OrderedDict, defaultdict from contextlib import suppress from typing import TYPE_CHECKING, Any, Dict, Hashable, Mapping, Optional, Tuple, Union @@ -387,14 +386,9 @@ def reindex_variables( for dim, indexer in indexers.items(): if isinstance(indexer, DataArray) and indexer.dims != (dim,): - warnings.warn( + raise ValueError( "Indexer has dimensions {:s} that are different " - "from that to be indexed along {:s}. " - "This will behave differently in the future.".format( - str(indexer.dims), dim - ), - FutureWarning, - stacklevel=3, + "from that to be indexed along {:s}".format(str(indexer.dims), dim) ) target = new_indexes[dim] = utils.safe_cast_to_index(indexers[dim]) @@ -592,14 +586,3 @@ def broadcast(*args, exclude=None): result.append(_broadcast_helper(arg, exclude, dims_map, common_coords)) return tuple(result) - - -def broadcast_arrays(*args): - import warnings - - warnings.warn( - "xarray.broadcast_arrays is deprecated: use " "xarray.broadcast instead", - DeprecationWarning, - stacklevel=2, - ) - return broadcast(*args) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8660fa952b1..a3655e2c4b2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -471,7 +471,7 @@ def _to_dataset_whole( dataset = Dataset._from_vars_and_coord_names(variables, coord_names) return dataset - def to_dataset(self, dim: Hashable = None, name: Hashable = None) -> Dataset: + def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: """Convert a DataArray to a Dataset. Parameters @@ -489,15 +489,9 @@ def to_dataset(self, dim: Hashable = None, name: Hashable = None) -> Dataset: dataset : Dataset """ if dim is not None and dim not in self.dims: - warnings.warn( - "the order of the arguments on DataArray.to_dataset " - "has changed; you now need to supply ``name`` as " - "a keyword argument", - FutureWarning, - stacklevel=2, + raise TypeError( + "{} is not a dim. If supplying a ``name``, pass as a kwarg.".format(dim) ) - name = dim - dim = None if dim is not None: if name is not None: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ac4f7052f14..2e9906ce5ae 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1226,16 +1226,6 @@ def transpose(self, *dims) -> "Variable": def T(self) -> "Variable": return self.transpose() - def expand_dims(self, *args): - import warnings - - warnings.warn( - "Variable.expand_dims is deprecated: use " "Variable.set_dims instead", - DeprecationWarning, - stacklevel=2, - ) - return self.expand_dims(*args) - def set_dims(self, dims, shape=None): """Return a new variable with given set of dimensions. This method might be used to attach new dimension(s) to variable. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 27e6ab92f71..8c01ef9a68c 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1408,13 +1408,11 @@ def test_reindex_like_no_index(self): with raises_regex(ValueError, "different size for unlabeled"): foo.reindex_like(bar) - @pytest.mark.filterwarnings("ignore:Indexer has dimensions") def test_reindex_regressions(self): - # regression test for #279 - expected = DataArray(np.random.randn(5), coords=[("time", range(5))]) + da = DataArray(np.random.randn(5), coords=[("time", range(5))]) time2 = DataArray(np.arange(5), dims="time2") - actual = expected.reindex(time=time2) - assert_identical(actual, expected) + with pytest.raises(ValueError): + da.reindex(time=time2) # regression test for #736, reindex can not change complex nums dtype x = np.array([1, 2, 3], dtype=np.complex) @@ -3685,10 +3683,8 @@ def test_to_dataset_whole(self): expected = Dataset({"foo": ("x", [1, 2])}) assert_identical(expected, actual) - expected = Dataset({"bar": ("x", [1, 2])}) - with pytest.warns(FutureWarning): + with pytest.raises(TypeError): actual = named.to_dataset("bar") - assert_identical(expected, actual) def test_to_dataset_split(self): array = DataArray([1, 2, 3], coords=[("x", list("abc"))], attrs={"a": 1}) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d9f0284969e..814fc31d734 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1689,9 +1689,8 @@ def test_reindex(self): # regression test for #279 expected = Dataset({"x": ("time", np.random.randn(5))}, {"time": range(5)}) time2 = DataArray(np.arange(5), dims="time2") - with pytest.warns(FutureWarning): + with pytest.raises(ValueError): actual = expected.reindex(time=time2) - assert_identical(actual, expected) # another regression test ds = Dataset( @@ -1707,11 +1706,10 @@ def test_reindex(self): def test_reindex_warning(self): data = create_test_data() - with pytest.warns(FutureWarning) as ws: + with pytest.raises(ValueError): # DataArray with different dimension raises Future warning ind = xr.DataArray([0.0, 1.0], dims=["new_dim"], name="ind") data.reindex(dim2=ind) - assert any(["Indexer has dimensions " in str(w.message) for w in ws]) # Should not warn ind = xr.DataArray([0.0, 1.0], dims=["dim2"], name="ind") From 9e1c690e6da93314acf801eba649c98a97649c58 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Sep 2019 20:31:15 +0200 Subject: [PATCH 05/19] allow np-array levels and colors in 2D plots (#3295) * test if levels is None * allow np levels and color list * whats-new * Update doc/whats-new.rst Co-Authored-By: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/plot/utils.py | 2 +- xarray/tests/test_plot.py | 28 ++++++++++++++++++++-------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e65f052ca8c..4e975c55d47 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -165,6 +165,8 @@ Bug fixes By `Hasan Ahmad `_. - Prevent :py:meth:`~xarray.DataArray.argmax` and :py:meth:`~xarray.DataArray.argmin` from calling dask compute (:issue:`3237`). By `Ulrich Herter `_. +- Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy + array (:issue:`3284`). By `Mathias Hauser `_. .. _whats-new.0.12.3: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 2d50734f519..53bbe8bacb9 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -731,7 +731,7 @@ def _process_cmap_cbar_kwargs( # colors is only valid when levels is supplied or the plot is of type # contour or contourf - if colors and (("contour" not in func.__name__) and (not levels)): + if colors and (("contour" not in func.__name__) and (levels is None)): raise ValueError("Can only specify colors with contour or levels") # we should not be getting a list of colors in cmap anymore diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index a1c05971ec4..c9b041b3ba7 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1283,26 +1283,38 @@ class TestContour(Common2dMixin, PlotTestCase): plotfunc = staticmethod(xplt.contour) + # matplotlib cmap.colors gives an rgbA ndarray + # when seaborn is used, instead we get an rgb tuple + @staticmethod + def _color_as_tuple(c): + return tuple(c[:3]) + def test_colors(self): - # matplotlib cmap.colors gives an rgbA ndarray - # when seaborn is used, instead we get an rgb tuple - def _color_as_tuple(c): - return tuple(c[:3]) # with single color, we don't want rgb array artist = self.plotmethod(colors="k") assert artist.cmap.colors[0] == "k" artist = self.plotmethod(colors=["k", "b"]) - assert _color_as_tuple(artist.cmap.colors[1]) == (0.0, 0.0, 1.0) + assert self._color_as_tuple(artist.cmap.colors[1]) == (0.0, 0.0, 1.0) artist = self.darray.plot.contour( levels=[-0.5, 0.0, 0.5, 1.0], colors=["k", "r", "w", "b"] ) - assert _color_as_tuple(artist.cmap.colors[1]) == (1.0, 0.0, 0.0) - assert _color_as_tuple(artist.cmap.colors[2]) == (1.0, 1.0, 1.0) + assert self._color_as_tuple(artist.cmap.colors[1]) == (1.0, 0.0, 0.0) + assert self._color_as_tuple(artist.cmap.colors[2]) == (1.0, 1.0, 1.0) + # the last color is now under "over" + assert self._color_as_tuple(artist.cmap._rgba_over) == (0.0, 0.0, 1.0) + + def test_colors_np_levels(self): + + # https://github.com/pydata/xarray/issues/3284 + levels = np.array([-0.5, 0.0, 0.5, 1.0]) + artist = self.darray.plot.contour(levels=levels, colors=["k", "r", "w", "b"]) + assert self._color_as_tuple(artist.cmap.colors[1]) == (1.0, 0.0, 0.0) + assert self._color_as_tuple(artist.cmap.colors[2]) == (1.0, 1.0, 1.0) # the last color is now under "over" - assert _color_as_tuple(artist.cmap._rgba_over) == (0.0, 0.0, 1.0) + assert self._color_as_tuple(artist.cmap._rgba_over) == (0.0, 0.0, 1.0) def test_cmap_and_color_both(self): with pytest.raises(ValueError): From e38ca0f168ebc2c52857a2abd45572a6e92beca8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 9 Sep 2019 18:34:13 +0000 Subject: [PATCH 06/19] Remove deprecated concat kwargs. (#3288) --- doc/whats-new.rst | 5 ++++- xarray/core/concat.py | 38 +------------------------------------ xarray/tests/test_concat.py | 5 ----- 3 files changed, 5 insertions(+), 43 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4e975c55d47..f5e0f9c467f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,10 @@ Breaking changes By `Maximilian Roos `_ - The ``inplace`` kwarg for public methods now raises an error, having been deprecated since v0.11.0. - By `Maximilian Roos `_ + By `Maximilian Roos `_ +- :py:func:`~xarray.concat` now requires the ``dim`` argument. Its ``indexers``, ``mode`` + and ``concat_over`` kwargs have now been removed. + By `Deepak Cherian `_ - Most xarray objects now define ``__slots__``. This reduces overall RAM usage by ~22% (not counting the underlying numpy buffers); on CPython 3.7/x64, a trivial DataArray has gone down from 1.9kB to 1.5kB. diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 9c7c622a31c..d5dfa49a8d5 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,4 +1,3 @@ -import warnings from collections import OrderedDict import pandas as pd @@ -11,14 +10,11 @@ def concat( objs, - dim=None, + dim, data_vars="all", coords="different", compat="equals", positions=None, - indexers=None, - mode=None, - concat_over=None, fill_value=dtypes.NA, join="outer", ): @@ -111,38 +107,6 @@ def concat( except StopIteration: raise ValueError("must supply at least one object to concatenate") - if dim is None: - warnings.warn( - "the `dim` argument to `concat` will be required " - "in a future version of xarray; for now, setting it to " - "the old default of 'concat_dim'", - FutureWarning, - stacklevel=2, - ) - dim = "concat_dims" - - if indexers is not None: # pragma: no cover - warnings.warn( - "indexers has been renamed to positions; the alias " - "will be removed in a future version of xarray", - FutureWarning, - stacklevel=2, - ) - positions = indexers - - if mode is not None: - raise ValueError( - "`mode` is no longer a valid argument to " - "xarray.concat; it has been split into the " - "`data_vars` and `coords` arguments" - ) - if concat_over is not None: - raise ValueError( - "`concat_over` is no longer a valid argument to " - "xarray.concat; it has been split into the " - "`data_vars` and `coords` arguments" - ) - if isinstance(first_obj, DataArray): f = _dataarray_concat elif isinstance(first_obj, Dataset): diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index b8ab89e926c..ee99ca027d9 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -163,11 +163,6 @@ def test_concat_errors(self): with raises_regex(ValueError, "coordinate in some datasets but not others"): concat([Dataset({"x": 0}), Dataset({}, {"x": 1})], dim="z") - with raises_regex(ValueError, "no longer a valid"): - concat([data, data], "new_dim", mode="different") - with raises_regex(ValueError, "no longer a valid"): - concat([data, data], "new_dim", concat_over="different") - def test_concat_join_kwarg(self): ds1 = Dataset({"a": (("x", "y"), [[0]])}, coords={"x": [0], "y": [0]}) ds2 = Dataset({"a": (("x", "y"), [[0]])}, coords={"x": [1], "y": [0.0001]}) From 69c7e01e5167a3137c285cb50d1978252bb8bcbf Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 9 Sep 2019 15:17:33 -0400 Subject: [PATCH 07/19] Compat and encoding deprecation to 0.14 (#3294) * push the removal of the compat and encoding arguments from Dataset/DataArray back to 0.14 * require dim argument to concat * Update whats-new.rst --- doc/whats-new.rst | 4 ++-- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f5e0f9c467f..d81986cb948 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -48,8 +48,8 @@ Breaking changes - Any user code that defines custom subclasses of xarray classes must now explicitly define ``__slots__`` itself. Subclasses that don't add any attributes must state so by defining ``__slots__ = ()`` right after the class header. - Omitting ``__slots__`` will now cause a ``FutureWarning`` to be logged, and a hard - crash in a later release. + Omitting ``__slots__`` will now cause a ``FutureWarning`` to be logged, and will raise an + error in a later release. (:issue:`3250`) by `Guido Imperiale `_. - :py:meth:`~Dataset.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index a3655e2c4b2..807baddedf9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -315,7 +315,7 @@ def __init__( if encoding is not None: warnings.warn( "The `encoding` argument to `DataArray` is deprecated, and . " - "will be removed in 0.13. " + "will be removed in 0.14. " "Instead, specify the encoding when writing to disk or " "set the `encoding` attribute directly.", FutureWarning, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1476c1ba646..d6f0da42722 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -489,7 +489,7 @@ def __init__( if compat is not None: warnings.warn( "The `compat` argument to Dataset is deprecated and will be " - "removed in 0.13." + "removed in 0.14." "Instead, use `merge` to control how variables are combined", FutureWarning, stacklevel=2, From 732cf9afb434caeec34a29e91144da4783b6a670 Mon Sep 17 00:00:00 2001 From: Siyu Yang Date: Thu, 12 Sep 2019 19:07:10 -0700 Subject: [PATCH 08/19] Update why-xarray.rst with clearer expression (#3307) in one sentence. --- doc/why-xarray.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/why-xarray.rst b/doc/why-xarray.rst index d0a6c591b29..25d558d99d5 100644 --- a/doc/why-xarray.rst +++ b/doc/why-xarray.rst @@ -62,9 +62,8 @@ The power of the dataset over a plain dictionary is that, in addition to pulling out arrays by name, it is possible to select or combine data along a dimension across all arrays simultaneously. Like a :py:class:`~pandas.DataFrame`, datasets facilitate array operations with -heterogeneous data -- the difference is that the arrays in a dataset can not -only have different data types, but can also have different numbers of -dimensions. +heterogeneous data -- the difference is that the arrays in a dataset can have +not only different data types, but also different numbers of dimensions. This data model is borrowed from the netCDF_ file format, which also provides xarray with a natural and portable serialization format. NetCDF is very popular From e90e8bc06cf8e7c97c7dc4c0e8ff1bf87c49faf6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Sep 2019 15:39:40 +0000 Subject: [PATCH 09/19] ignore h5py 2.10.0 warnings and fix invalid_netcdf warning test. (#3301) * ignore h5py 2.10.0 warnings and fix invalid_netcdf warning test. * Better fix. * fix fix. * remove comment. * Add docs. * Revert "Add docs." This reverts commit 14ae0b1153f56144c7a90966512f0a156355cf25. --- xarray/tests/test_backends.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a5c42fd368c..f6254b32f4f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2163,6 +2163,7 @@ def test_encoding_unlimited_dims(self): @requires_h5netcdf @requires_netCDF4 +@pytest.mark.filterwarnings("ignore:use make_scale(name) instead") class TestH5NetCDFData(NetCDF4Base): engine = "h5netcdf" @@ -2173,16 +2174,25 @@ def create_store(self): @pytest.mark.filterwarnings("ignore:complex dtypes are supported by h5py") @pytest.mark.parametrize( - "invalid_netcdf, warns, num_warns", + "invalid_netcdf, warntype, num_warns", [(None, FutureWarning, 1), (False, FutureWarning, 1), (True, None, 0)], ) - def test_complex(self, invalid_netcdf, warns, num_warns): + def test_complex(self, invalid_netcdf, warntype, num_warns): expected = Dataset({"x": ("y", np.ones(5) + 1j * np.ones(5))}) save_kwargs = {"invalid_netcdf": invalid_netcdf} - with pytest.warns(warns) as record: + with pytest.warns(warntype) as record: with self.roundtrip(expected, save_kwargs=save_kwargs) as actual: assert_equal(expected, actual) - assert len(record) == num_warns + + recorded_num_warns = 0 + if warntype: + for warning in record: + if issubclass(warning.category, warntype) and ( + "complex dtypes" in str(warning.message) + ): + recorded_num_warns += 1 + + assert recorded_num_warns == num_warns def test_cross_engine_read_write_netcdf4(self): # Drop dim3, because its labels include strings. These appear to be @@ -2451,6 +2461,7 @@ def skip_if_not_engine(engine): @requires_dask +@pytest.mark.filterwarnings("ignore:use make_scale(name) instead") def test_open_mfdataset_manyfiles( readengine, nfiles, parallel, chunks, file_cache_maxsize ): From 7fb3b19d47e81afc5f7ff8506f1daeb3906b0fae Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Date: Sat, 14 Sep 2019 16:46:15 -0500 Subject: [PATCH 10/19] Accept int value in head, thin and tail (#3298) * Accept int value in head, thin and tail * Fix typing * Remove thin def val and add suggestions * Fix typing and change raise message --- xarray/core/dataarray.py | 28 +++++----- xarray/core/dataset.py | 99 +++++++++++++++++++++++++++------- xarray/tests/test_dataarray.py | 35 ++++++++++++ xarray/tests/test_dataset.py | 40 ++++++++++++++ 4 files changed, 171 insertions(+), 31 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 807baddedf9..7937a352cc6 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1035,10 +1035,12 @@ def sel( return self._from_temp_dataset(ds) def head( - self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + self, + indexers: Union[Mapping[Hashable, int], int] = None, + **indexers_kwargs: Any ) -> "DataArray": """Return a new DataArray whose data is given by the the first `n` - values along the specified dimension(s). + values along the specified dimension(s). Default `n` = 5 See Also -------- @@ -1046,16 +1048,16 @@ def head( DataArray.tail DataArray.thin """ - - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") - ds = self._to_temp_dataset().head(indexers=indexers) + ds = self._to_temp_dataset().head(indexers, **indexers_kwargs) return self._from_temp_dataset(ds) def tail( - self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + self, + indexers: Union[Mapping[Hashable, int], int] = None, + **indexers_kwargs: Any ) -> "DataArray": """Return a new DataArray whose data is given by the the last `n` - values along the specified dimension(s). + values along the specified dimension(s). Default `n` = 5 See Also -------- @@ -1063,15 +1065,16 @@ def tail( DataArray.head DataArray.thin """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") - ds = self._to_temp_dataset().tail(indexers=indexers) + ds = self._to_temp_dataset().tail(indexers, **indexers_kwargs) return self._from_temp_dataset(ds) def thin( - self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + self, + indexers: Union[Mapping[Hashable, int], int] = None, + **indexers_kwargs: Any ) -> "DataArray": """Return a new DataArray whose data is given by each `n` value - along the specified dimension(s). + along the specified dimension(s). Default `n` = 5 See Also -------- @@ -1079,8 +1082,7 @@ def thin( DataArray.head DataArray.tail """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") - ds = self._to_temp_dataset().thin(indexers=indexers) + ds = self._to_temp_dataset().thin(indexers, **indexers_kwargs) return self._from_temp_dataset(ds) def broadcast_like( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d6f0da42722..1eeb5350dfe 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2009,15 +2009,18 @@ def sel( return result._overwrite_indexes(new_indexes) def head( - self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + self, + indexers: Union[Mapping[Hashable, int], int] = None, + **indexers_kwargs: Any ) -> "Dataset": """Returns a new dataset with the first `n` values of each array for the specified dimension(s). Parameters ---------- - indexers : dict, optional - A dict with keys matching dimensions and integer values `n`. + indexers : dict or int, default: 5 + A dict with keys matching dimensions and integer values `n` + or a single integer `n` applied over all dimensions. One of indexers or indexers_kwargs must be provided. **indexers_kwargs : {dim: n, ...}, optional The keyword arguments form of ``indexers``. @@ -2030,20 +2033,41 @@ def head( Dataset.thin DataArray.head """ + if not indexers_kwargs: + if indexers is None: + indexers = 5 + if not isinstance(indexers, int) and not is_dict_like(indexers): + raise TypeError("indexers must be either dict-like or a single integer") + if isinstance(indexers, int): + indexers = {dim: indexers for dim in self.dims} indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") - indexers = {k: slice(val) for k, val in indexers.items()} - return self.isel(indexers) + for k, v in indexers.items(): + if not isinstance(v, int): + raise TypeError( + "expected integer type indexer for " + "dimension %r, found %r" % (k, type(v)) + ) + elif v < 0: + raise ValueError( + "expected positive integer as indexer " + "for dimension %r, found %s" % (k, v) + ) + indexers_slices = {k: slice(val) for k, val in indexers.items()} + return self.isel(indexers_slices) def tail( - self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + self, + indexers: Union[Mapping[Hashable, int], int] = None, + **indexers_kwargs: Any ) -> "Dataset": """Returns a new dataset with the last `n` values of each array for the specified dimension(s). Parameters ---------- - indexers : dict, optional - A dict with keys matching dimensions and integer values `n`. + indexers : dict or int, default: 5 + A dict with keys matching dimensions and integer values `n` + or a single integer `n` applied over all dimensions. One of indexers or indexers_kwargs must be provided. **indexers_kwargs : {dim: n, ...}, optional The keyword arguments form of ``indexers``. @@ -2056,24 +2080,44 @@ def tail( Dataset.thin DataArray.tail """ - + if not indexers_kwargs: + if indexers is None: + indexers = 5 + if not isinstance(indexers, int) and not is_dict_like(indexers): + raise TypeError("indexers must be either dict-like or a single integer") + if isinstance(indexers, int): + indexers = {dim: indexers for dim in self.dims} indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") - indexers = { + for k, v in indexers.items(): + if not isinstance(v, int): + raise TypeError( + "expected integer type indexer for " + "dimension %r, found %r" % (k, type(v)) + ) + elif v < 0: + raise ValueError( + "expected positive integer as indexer " + "for dimension %r, found %s" % (k, v) + ) + indexers_slices = { k: slice(-val, None) if val != 0 else slice(val) for k, val in indexers.items() } - return self.isel(indexers) + return self.isel(indexers_slices) def thin( - self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + self, + indexers: Union[Mapping[Hashable, int], int] = None, + **indexers_kwargs: Any ) -> "Dataset": """Returns a new dataset with each array indexed along every `n`th value for the specified dimension(s) Parameters ---------- - indexers : dict, optional - A dict with keys matching dimensions and integer values `n`. + indexers : dict or int, default: 5 + A dict with keys matching dimensions and integer values `n` + or a single integer `n` applied over all dimensions. One of indexers or indexers_kwargs must be provided. **indexers_kwargs : {dim: n, ...}, optional The keyword arguments form of ``indexers``. @@ -2086,11 +2130,30 @@ def thin( Dataset.tail DataArray.thin """ + if ( + not indexers_kwargs + and not isinstance(indexers, int) + and not is_dict_like(indexers) + ): + raise TypeError("indexers must be either dict-like or a single integer") + if isinstance(indexers, int): + indexers = {dim: indexers for dim in self.dims} indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") - if 0 in indexers.values(): - raise ValueError("step cannot be zero") - indexers = {k: slice(None, None, val) for k, val in indexers.items()} - return self.isel(indexers) + for k, v in indexers.items(): + if not isinstance(v, int): + raise TypeError( + "expected integer type indexer for " + "dimension %r, found %r" % (k, type(v)) + ) + elif v < 0: + raise ValueError( + "expected positive integer as indexer " + "for dimension %r, found %s" % (k, v) + ) + elif v == 0: + raise ValueError("step cannot be zero") + indexers_slices = {k: slice(None, None, val) for k, val in indexers.items()} + return self.isel(indexers_slices) def broadcast_like( self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8c01ef9a68c..78d9ace6be1 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1005,13 +1005,48 @@ def test_isel_drop(self): def test_head(self): assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5)) assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0)) + assert_equal( + self.dv.isel({dim: slice(6) for dim in self.dv.dims}), self.dv.head(6) + ) + assert_equal( + self.dv.isel({dim: slice(5) for dim in self.dv.dims}), self.dv.head() + ) + with raises_regex(TypeError, "either dict-like or a single int"): + self.dv.head([3]) + with raises_regex(TypeError, "expected integer type"): + self.dv.head(x=3.1) + with raises_regex(ValueError, "expected positive int"): + self.dv.head(-3) def test_tail(self): assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5)) assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0)) + assert_equal( + self.dv.isel({dim: slice(-6, None) for dim in self.dv.dims}), + self.dv.tail(6), + ) + assert_equal( + self.dv.isel({dim: slice(-5, None) for dim in self.dv.dims}), self.dv.tail() + ) + with raises_regex(TypeError, "either dict-like or a single int"): + self.dv.tail([3]) + with raises_regex(TypeError, "expected integer type"): + self.dv.tail(x=3.1) + with raises_regex(ValueError, "expected positive int"): + self.dv.tail(-3) def test_thin(self): assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5)) + assert_equal( + self.dv.isel({dim: slice(None, None, 6) for dim in self.dv.dims}), + self.dv.thin(6), + ) + with raises_regex(TypeError, "either dict-like or a single int"): + self.dv.thin([3]) + with raises_regex(TypeError, "expected integer type"): + self.dv.thin(x=3.1) + with raises_regex(ValueError, "expected positive int"): + self.dv.thin(-3) with raises_regex(ValueError, "cannot be zero"): self.dv.thin(time=0) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 814fc31d734..d8401e0bd42 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1422,6 +1422,21 @@ def test_head(self): actual = data.head(time=0) assert_equal(expected, actual) + expected = data.isel({dim: slice(6) for dim in data.dims}) + actual = data.head(6) + assert_equal(expected, actual) + + expected = data.isel({dim: slice(5) for dim in data.dims}) + actual = data.head() + assert_equal(expected, actual) + + with raises_regex(TypeError, "either dict-like or a single int"): + data.head([3]) + with raises_regex(TypeError, "expected integer type"): + data.head(dim2=3.1) + with raises_regex(ValueError, "expected positive int"): + data.head(time=-3) + def test_tail(self): data = create_test_data() @@ -1433,6 +1448,21 @@ def test_tail(self): actual = data.tail(dim1=0) assert_equal(expected, actual) + expected = data.isel({dim: slice(-6, None) for dim in data.dims}) + actual = data.tail(6) + assert_equal(expected, actual) + + expected = data.isel({dim: slice(-5, None) for dim in data.dims}) + actual = data.tail() + assert_equal(expected, actual) + + with raises_regex(TypeError, "either dict-like or a single int"): + data.tail([3]) + with raises_regex(TypeError, "expected integer type"): + data.tail(dim2=3.1) + with raises_regex(ValueError, "expected positive int"): + data.tail(time=-3) + def test_thin(self): data = create_test_data() @@ -1440,8 +1470,18 @@ def test_thin(self): actual = data.thin(time=5, dim2=6) assert_equal(expected, actual) + expected = data.isel({dim: slice(None, None, 6) for dim in data.dims}) + actual = data.thin(6) + assert_equal(expected, actual) + + with raises_regex(TypeError, "either dict-like or a single int"): + data.thin([3]) + with raises_regex(TypeError, "expected integer type"): + data.thin(dim2=3.1) with raises_regex(ValueError, "cannot be zero"): data.thin(time=0) + with raises_regex(ValueError, "expected positive int"): + data.thin(time=-3) @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_sel_fancy(self): From 1ce91051e3751a65dbbbc7c5ff3e1a2f00ea6ee5 Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Date: Sun, 15 Sep 2019 15:27:30 -0500 Subject: [PATCH 11/19] Fix DataArray api doc (#3309) --- doc/api.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index fb6e037a4f2..699687441d7 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -282,9 +282,9 @@ Indexing DataArray.loc DataArray.isel DataArray.sel - Dataset.head - Dataset.tail - Dataset.thin + DataArray.head + DataArray.tail + DataArray.thin DataArray.squeeze DataArray.interp DataArray.interp_like From b65ce8666020ba3a0300154655d2e5c05884d73b Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 16 Sep 2019 00:16:15 +0200 Subject: [PATCH 12/19] Honor `keep_attrs` in DataArray.quantile (#3305) * Added `keep_attrs` argument to Variable.quantile. TestDataArray.test_quantile now checks for attributes in output. * black * updated whats new. * removed vestigial comment. Switched default Variable.quantile keep_attrs to False. --- doc/whats-new.rst | 20 +++++++++++--------- xarray/core/dataset.py | 5 ++++- xarray/core/variable.py | 17 +++++++++++++---- xarray/tests/test_dataarray.py | 4 ++-- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d81986cb948..ab4b17ff16d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,8 +26,8 @@ Breaking changes - The ``isel_points`` and ``sel_points`` methods are removed, having been deprecated since v0.10.0. These are redundant with the ``isel`` / ``sel`` methods. - See :ref:`vectorized_indexing` for the details - By `Maximilian Roos `_ + See :ref:`vectorized_indexing` for the details + By `Maximilian Roos `_ - The ``inplace`` kwarg for public methods now raises an error, having been deprecated since v0.11.0. By `Maximilian Roos `_ @@ -52,12 +52,12 @@ Breaking changes error in a later release. (:issue:`3250`) by `Guido Imperiale `_. -- :py:meth:`~Dataset.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous +- :py:meth:`~Dataset.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous positional arguments were deprecated) - Reindexing with variables of a different dimension now raise an error (previously deprecated) -- :py:func:`~xarray.broadcast_array` is removed (previously deprecated in favor of +- :py:func:`~xarray.broadcast_array` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) -- :py:meth:`~Variable.expand_dims` is removed (previously deprecated in favor of +- :py:meth:`~Variable.expand_dims` is removed (previously deprecated in favor of :py:meth:`~Variable.set_dims`) New functions/methods @@ -90,7 +90,7 @@ New functions/methods and `Maximilian Roos `_. - Added :py:meth:`DataArray.broadcast_like` and :py:meth:`Dataset.broadcast_like`. - By `Deepak Cherian `_ and `David Mertz + By `Deepak Cherian `_ and `David Mertz `_. - Dataset plotting API for visualizing dependencies between two `DataArray`s! @@ -131,14 +131,14 @@ Enhancements :py:meth:`DataArray.set_index`, as well are more specific error messages when the user passes invalid arguments (:issue:`3176`). By `Gregory Gundersen `_. - + - :py:func:`filter_by_attrs` now filters the coordinates as well as the variables. By `Spencer Jones `_. Bug fixes ~~~~~~~~~ -- Improve "missing dimensions" error message for :py:func:`~xarray.apply_ufunc` - (:issue:`2078`). +- Improve "missing dimensions" error message for :py:func:`~xarray.apply_ufunc` + (:issue:`2078`). By `Rick Russotto `_. - :py:meth:`~xarray.DataArray.assign_coords` now supports dictionary arguments (:issue:`3231`). @@ -170,6 +170,8 @@ Bug fixes dask compute (:issue:`3237`). By `Ulrich Herter `_. - Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy array (:issue:`3284`). By `Mathias Hauser `_. +- Fixed bug in :meth:`DataArray.quantile` failing to keep attributes when + `keep_attrs` was True (:issue:`3304`). By David Huard `_. .. _whats-new.0.12.3: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1eeb5350dfe..8a53e7ba757 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4831,7 +4831,10 @@ def quantile( # the former is often more efficient reduce_dims = None variables[name] = var.quantile( - q, dim=reduce_dims, interpolation=interpolation + q, + dim=reduce_dims, + interpolation=interpolation, + keep_attrs=keep_attrs, ) else: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2e9906ce5ae..b4b01f7ee49 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1592,7 +1592,7 @@ def no_conflicts(self, other): """ return self.broadcast_equals(other, equiv=duck_array_ops.array_notnull_equiv) - def quantile(self, q, dim=None, interpolation="linear"): + def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1615,6 +1615,10 @@ def quantile(self, q, dim=None, interpolation="linear"): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -1623,7 +1627,7 @@ def quantile(self, q, dim=None, interpolation="linear"): is a scalar. If multiple percentiles are given, first axis of the result corresponds to the quantile and a quantile dimension is added to the return array. The other dimensions are the - dimensions that remain after the reduction of the array. + dimensions that remain after the reduction of the array. See Also -------- @@ -1651,14 +1655,19 @@ def quantile(self, q, dim=None, interpolation="linear"): axis = None new_dims = [] - # only add the quantile dimension if q is array like + # Only add the quantile dimension if q is array-like if q.ndim != 0: new_dims = ["quantile"] + new_dims qs = np.nanpercentile( self.data, q * 100.0, axis=axis, interpolation=interpolation ) - return Variable(new_dims, qs) + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + attrs = self._attrs if keep_attrs else None + + return Variable(new_dims, qs, attrs) def rank(self, dim, pct=False): """Ranks the data. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 78d9ace6be1..49980c75b15 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2333,17 +2333,17 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) - # skip due to bug in older versions of numpy.nanpercentile def test_quantile(self): for q in [0.25, [0.50], [0.25, 0.75]]: for axis, dim in zip( [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]] ): - actual = self.dv.quantile(q, dim=dim) + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) expected = np.nanpercentile( self.dv.values, np.array(q) * 100, axis=axis ) np.testing.assert_allclose(actual.values, expected) + assert actual.attrs == self.attrs def test_reduce_keep_attrs(self): # Test dropped attrs From 756c94164840e8c070bcd26681b97c31412909ae Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 16 Sep 2019 14:49:27 +0000 Subject: [PATCH 13/19] Refactor concat to use merge for non-concatenated variables (#3239) * Add compat = 'override' and data_vars/coords='sensible' * concat tests. * Update docstring. * Begin merge, combine. * Merge non concatenated variables. * Fix tests. * Fix tests 2 * Fix test 3 * Cleanup: reduce number of times we loop over datasets. * unique_variable does minimum number of loads: fixes dask test * docstrings for compat='override' * concat compat docstring. * remove the sensible option. * reduce silly changes. * fix groupby order test. * cleanup: var names + remove one loop through datasets. * Add whats-new entry. * Add note in io.rst * fix warning. * Update netcdf multi-file dataset section in io.rst. * Update mfdataset in dask.rst. * simplify parse_datasets. * Avoid using merge_variables. unique_variable instead. * small stuff. * Update docs. * minor fix. * minor fix. * lint. * Better error message. * rename to shorter variable names. * Cleanup: fillna preserves attrs now. * Look for concat dim in data_vars also. * Update xarray/core/merge.py Co-Authored-By: Stephan Hoyer * avoid unnecessary computes. * minor cleanups. --- doc/dask.rst | 7 +- doc/io.rst | 244 +++++++++++++++++++++-------------- doc/whats-new.rst | 29 ++++- xarray/backends/api.py | 3 +- xarray/core/combine.py | 20 ++- xarray/core/concat.py | 190 +++++++++++++++++---------- xarray/core/dataarray.py | 4 +- xarray/core/merge.py | 62 +++++---- xarray/tests/test_combine.py | 13 +- xarray/tests/test_concat.py | 49 ++++++- xarray/tests/test_dask.py | 1 - xarray/tests/test_merge.py | 2 + 12 files changed, 402 insertions(+), 222 deletions(-) diff --git a/doc/dask.rst b/doc/dask.rst index adf0a6bf585..19cbc11292c 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -75,13 +75,14 @@ entirely equivalent to opening a dataset using ``open_dataset`` and then chunking the data using the ``chunk`` method, e.g., ``xr.open_dataset('example-data.nc').chunk({'time': 10})``. -To open multiple files simultaneously, use :py:func:`~xarray.open_mfdataset`:: +To open multiple files simultaneously in parallel using Dask delayed, +use :py:func:`~xarray.open_mfdataset`:: - xr.open_mfdataset('my/files/*.nc') + xr.open_mfdataset('my/files/*.nc', parallel=True) This function will automatically concatenate and merge dataset into one in the simple cases that it understands (see :py:func:`~xarray.auto_combine` -for the full disclaimer). By default, ``open_mfdataset`` will chunk each +for the full disclaimer). By default, :py:func:`~xarray.open_mfdataset` will chunk each netCDF file into a single Dask array; again, supply the ``chunks`` argument to control the size of the resulting Dask arrays. In more complex cases, you can open each file individually using ``open_dataset`` and merge the result, as diff --git a/doc/io.rst b/doc/io.rst index f7ac8c095b9..775d915188e 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -99,7 +99,9 @@ netCDF The recommended way to store xarray data structures is `netCDF`__, which is a binary file format for self-described datasets that originated in the geosciences. xarray is based on the netCDF data model, so netCDF files -on disk directly correspond to :py:class:`~xarray.Dataset` objects. +on disk directly correspond to :py:class:`~xarray.Dataset` objects (more accurately, +a group in a netCDF file directly corresponds to a to :py:class:`~xarray.Dataset` object. +See :ref:`io.netcdf_groups` for more.) NetCDF is supported on almost all platforms, and parsers exist for the vast majority of scientific programming languages. Recent versions of @@ -121,7 +123,7 @@ read/write netCDF V4 files and use the compression options described below). __ https://github.com/Unidata/netcdf4-python We can save a Dataset to disk using the -:py:attr:`Dataset.to_netcdf ` method: +:py:meth:`~Dataset.to_netcdf` method: .. ipython:: python @@ -147,19 +149,6 @@ convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back when loading, ensuring that the ``DataArray`` that is loaded is always exactly the same as the one that was saved. -NetCDF groups are not supported as part of the -:py:class:`~xarray.Dataset` data model. Instead, groups can be loaded -individually as Dataset objects. -To do so, pass a ``group`` keyword argument to the -``open_dataset`` function. The group can be specified as a path-like -string, e.g., to access subgroup 'bar' within group 'foo' pass -'/foo/bar' as the ``group`` argument. -In a similar way, the ``group`` keyword argument can be given to the -:py:meth:`~xarray.Dataset.to_netcdf` method to write to a group -in a netCDF file. -When writing multiple groups in one file, pass ``mode='a'`` to ``to_netcdf`` -to ensure that each call does not delete the file. - Data is always loaded lazily from netCDF files. You can manipulate, slice and subset Dataset and DataArray objects, and no array values are loaded into memory until you try to perform some sort of actual computation. For an example of how these @@ -195,6 +184,24 @@ It is possible to append or overwrite netCDF variables using the ``mode='a'`` argument. When using this option, all variables in the dataset will be written to the original netCDF file, regardless if they exist in the original dataset. + +.. _io.netcdf_groups: + +Groups +~~~~~~ + +NetCDF groups are not supported as part of the :py:class:`~xarray.Dataset` data model. +Instead, groups can be loaded individually as Dataset objects. +To do so, pass a ``group`` keyword argument to the +:py:func:`~xarray.open_dataset` function. The group can be specified as a path-like +string, e.g., to access subgroup ``'bar'`` within group ``'foo'`` pass +``'/foo/bar'`` as the ``group`` argument. +In a similar way, the ``group`` keyword argument can be given to the +:py:meth:`~xarray.Dataset.to_netcdf` method to write to a group +in a netCDF file. +When writing multiple groups in one file, pass ``mode='a'`` to +:py:meth:`~xarray.Dataset.to_netcdf` to ensure that each call does not delete the file. + .. _io.encoding: Reading encoded data @@ -203,7 +210,7 @@ Reading encoded data NetCDF files follow some conventions for encoding datetime arrays (as numbers with a "units" attribute) and for packing and unpacking data (as described by the "scale_factor" and "add_offset" attributes). If the argument -``decode_cf=True`` (default) is given to ``open_dataset``, xarray will attempt +``decode_cf=True`` (default) is given to :py:func:`~xarray.open_dataset`, xarray will attempt to automatically decode the values in the netCDF objects according to `CF conventions`_. Sometimes this will fail, for example, if a variable has an invalid "units" or "calendar" attribute. For these cases, you can @@ -247,6 +254,130 @@ will remove encoding information. import os os.remove('saved_on_disk.nc') + +.. _combining multiple files: + +Reading multi-file datasets +........................... + +NetCDF files are often encountered in collections, e.g., with different files +corresponding to different model runs or one file per timestamp. +xarray can straightforwardly combine such files into a single Dataset by making use of +:py:func:`~xarray.concat`, :py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and +:py:func:`~xarray.combine_by_coords`. For details on the difference between these +functions see :ref:`combining data`. + +Xarray includes support for manipulating datasets that don't fit into memory +with dask_. If you have dask installed, you can open multiple files +simultaneously in parallel using :py:func:`~xarray.open_mfdataset`:: + + xr.open_mfdataset('my/files/*.nc', parallel=True) + +This function automatically concatenates and merges multiple files into a +single xarray dataset. +It is the recommended way to open multiple files with xarray. +For more details on parallel reading, see :ref:`combining.multi`, :ref:`dask.io` and a +`blog post`_ by Stephan Hoyer. +:py:func:`~xarray.open_mfdataset` takes many kwargs that allow you to +control its behaviour (for e.g. ``parallel``, ``combine``, ``compat``, ``join``, ``concat_dim``). +See its docstring for more details. + + +.. note:: + + A common use-case involves a dataset distributed across a large number of files with + each file containing a large number of variables. Commonly a few of these variables + need to be concatenated along a dimension (say ``"time"``), while the rest are equal + across the datasets (ignoring floating point differences). The following command + with suitable modifications (such as ``parallel=True``) works well with such datasets:: + + xr.open_mfdataset('my/files/*.nc', concat_dim="time", + data_vars='minimal', coords='minimal', compat='override') + + This command concatenates variables along the ``"time"`` dimension, but only those that + already contain the ``"time"`` dimension (``data_vars='minimal', coords='minimal'``). + Variables that lack the ``"time"`` dimension are taken from the first dataset + (``compat='override'``). + + +.. _dask: http://dask.pydata.org +.. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ + +Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`~xarray.open_mfdataset`. +One can use the ``preprocess`` argument to provide a function that takes a dataset +and returns a modified Dataset. +:py:func:`~xarray.open_mfdataset` will call ``preprocess`` on every dataset +(corresponding to each file) prior to combining them. + + +If :py:func:`~xarray.open_mfdataset` does not meet your needs, other approaches are possible. +The general pattern for parallel reading of multiple files +using dask, modifying those datasets and then combining into a single ``Dataset`` is:: + + def modify(ds): + # modify ds here + return ds + + + # this is basically what open_mfdataset does + open_kwargs = dict(decode_cf=True, decode_times=False) + open_tasks = [dask.delayed(xr.open_dataset)(f, **open_kwargs) for f in file_names] + tasks = [dask.delayed(modify)(task) for task in open_tasks] + datasets = dask.compute(tasks) # get a list of xarray.Datasets + combined = xr.combine_nested(datasets) # or some combination of concat, merge + + +As an example, here's how we could approximate ``MFDataset`` from the netCDF4 +library:: + + from glob import glob + import xarray as xr + + def read_netcdfs(files, dim): + # glob expands paths with * to a list of files, like the unix shell + paths = sorted(glob(files)) + datasets = [xr.open_dataset(p) for p in paths] + combined = xr.concat(dataset, dim) + return combined + + combined = read_netcdfs('/all/my/files/*.nc', dim='time') + +This function will work in many cases, but it's not very robust. First, it +never closes files, which means it will fail one you need to load more than +a few thousands file. Second, it assumes that you want all the data from each +file and that it can all fit into memory. In many situations, you only need +a small subset or an aggregated summary of the data from each file. + +Here's a slightly more sophisticated example of how to remedy these +deficiencies:: + + def read_netcdfs(files, dim, transform_func=None): + def process_one_path(path): + # use a context manager, to ensure the file gets closed after use + with xr.open_dataset(path) as ds: + # transform_func should do some sort of selection or + # aggregation + if transform_func is not None: + ds = transform_func(ds) + # load all data from the transformed dataset, to ensure we can + # use it after closing each original file + ds.load() + return ds + + paths = sorted(glob(files)) + datasets = [process_one_path(p) for p in paths] + combined = xr.concat(datasets, dim) + return combined + + # here we suppose we only care about the combined mean of each file; + # you might also use indexing operations like .sel to subset datasets + combined = read_netcdfs('/all/my/files/*.nc', dim='time', + transform_func=lambda ds: ds.mean()) + +This pattern works well and is very robust. We've used similar code to process +tens of thousands of files constituting 100s of GB of data. + + .. _io.netcdf.writing_encoded: Writing encoded data @@ -817,84 +948,3 @@ For CSV files, one might also consider `xarray_extras`_. .. _xarray_extras: https://xarray-extras.readthedocs.io/en/latest/api/csv.html .. _IO tools: http://pandas.pydata.org/pandas-docs/stable/io.html - - -.. _combining multiple files: - - -Combining multiple files ------------------------- - -NetCDF files are often encountered in collections, e.g., with different files -corresponding to different model runs. xarray can straightforwardly combine such -files into a single Dataset by making use of :py:func:`~xarray.concat`, -:py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and -:py:func:`~xarray.combine_by_coords`. For details on the difference between these -functions see :ref:`combining data`. - -.. note:: - - Xarray includes support for manipulating datasets that don't fit into memory - with dask_. If you have dask installed, you can open multiple files - simultaneously using :py:func:`~xarray.open_mfdataset`:: - - xr.open_mfdataset('my/files/*.nc') - - This function automatically concatenates and merges multiple files into a - single xarray dataset. - It is the recommended way to open multiple files with xarray. - For more details, see :ref:`combining.multi`, :ref:`dask.io` and a - `blog post`_ by Stephan Hoyer. - -.. _dask: http://dask.pydata.org -.. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ - -For example, here's how we could approximate ``MFDataset`` from the netCDF4 -library:: - - from glob import glob - import xarray as xr - - def read_netcdfs(files, dim): - # glob expands paths with * to a list of files, like the unix shell - paths = sorted(glob(files)) - datasets = [xr.open_dataset(p) for p in paths] - combined = xr.concat(dataset, dim) - return combined - - combined = read_netcdfs('/all/my/files/*.nc', dim='time') - -This function will work in many cases, but it's not very robust. First, it -never closes files, which means it will fail one you need to load more than -a few thousands file. Second, it assumes that you want all the data from each -file and that it can all fit into memory. In many situations, you only need -a small subset or an aggregated summary of the data from each file. - -Here's a slightly more sophisticated example of how to remedy these -deficiencies:: - - def read_netcdfs(files, dim, transform_func=None): - def process_one_path(path): - # use a context manager, to ensure the file gets closed after use - with xr.open_dataset(path) as ds: - # transform_func should do some sort of selection or - # aggregation - if transform_func is not None: - ds = transform_func(ds) - # load all data from the transformed dataset, to ensure we can - # use it after closing each original file - ds.load() - return ds - - paths = sorted(glob(files)) - datasets = [process_one_path(p) for p in paths] - combined = xr.concat(datasets, dim) - return combined - - # here we suppose we only care about the combined mean of each file; - # you might also use indexing operations like .sel to subset datasets - combined = read_netcdfs('/all/my/files/*.nc', dim='time', - transform_func=lambda ds: ds.mean()) - -This pattern works well and is very robust. We've used similar code to process -tens of thousands of files constituting 100s of GB of data. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ab4b17ff16d..492c9279e6b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -93,7 +93,7 @@ New functions/methods By `Deepak Cherian `_ and `David Mertz `_. -- Dataset plotting API for visualizing dependencies between two `DataArray`s! +- Dataset plotting API for visualizing dependencies between two DataArrays! Currently only :py:meth:`Dataset.plot.scatter` is implemented. By `Yohai Bar Sinai `_ and `Deepak Cherian `_ @@ -103,11 +103,30 @@ New functions/methods Enhancements ~~~~~~~~~~~~ -- Added ``join='override'``. This only checks that index sizes are equal among objects and skips - checking indexes for equality. By `Deepak Cherian `_. +- Multiple enhancements to :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset`. -- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. - It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian `_. + - Added ``compat='override'``. When merging, this option picks the variable from the first dataset + and skips all comparisons. + + - Added ``join='override'``. When aligning, this only checks that index sizes are equal among objects + and skips checking indexes for equality. + + - :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. + It is passed down to :py:func:`~xarray.align`. + + - :py:func:`~xarray.concat` now calls :py:func:`~xarray.merge` on variables that are not concatenated + (i.e. variables without ``concat_dim`` when ``data_vars`` or ``coords`` are ``"minimal"``). + :py:func:`~xarray.concat` passes its new ``compat`` kwarg down to :py:func:`~xarray.merge`. + (:issue:`2064`) + + Users can avoid a common bottleneck when using :py:func:`~xarray.open_mfdataset` on a large number of + files with variables that are known to be aligned and some of which need not be concatenated. + Slow equality comparisons can now be avoided, for e.g.:: + + data = xr.open_mfdataset(files, concat_dim='time', data_vars='minimal', + coords='minimal', compat='override', join='override') + + By `Deepak Cherian `_: - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if ``append_dim`` is set, as it will automatically be set to ``'a'`` internally. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a20d3c2a306..1f0869cfc53 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -761,7 +761,7 @@ def open_mfdataset( `xarray.auto_combine` is used, but in the future this behavior will switch to use `xarray.combine_by_coords` by default. compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts'}, optional + 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts when merging: * 'broadcast_equals': all values must be equal when variables are @@ -772,6 +772,7 @@ def open_mfdataset( * 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. + * 'override': skip comparing and pick variable from first dataset preprocess : callable, optional If provided, call this function on each dataset prior to concatenation. You can find the file-name from which each dataset was loaded in diff --git a/xarray/core/combine.py b/xarray/core/combine.py index c24be88b19e..e35bb51e030 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -243,6 +243,7 @@ def _combine_1d( dim=concat_dim, data_vars=data_vars, coords=coords, + compat=compat, fill_value=fill_value, join=join, ) @@ -351,7 +352,7 @@ def combine_nested( Must be the same length as the depth of the list passed to ``datasets``. compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts'}, optional + 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential merge conflicts: @@ -363,6 +364,7 @@ def combine_nested( - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. + - 'override': skip comparing and pick variable from first dataset data_vars : {'minimal', 'different', 'all' or list of str}, optional Details are in the documentation of concat coords : {'minimal', 'different', 'all' or list of str}, optional @@ -504,7 +506,7 @@ def combine_by_coords( datasets : sequence of xarray.Dataset Dataset objects to combine. compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts'}, optional + 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts: @@ -516,6 +518,7 @@ def combine_by_coords( - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. + - 'override': skip comparing and pick variable from first dataset data_vars : {'minimal', 'different', 'all' or list of str}, optional Details are in the documentation of concat coords : {'minimal', 'different', 'all' or list of str}, optional @@ -598,6 +601,7 @@ def combine_by_coords( concat_dims=concat_dims, data_vars=data_vars, coords=coords, + compat=compat, fill_value=fill_value, join=join, ) @@ -667,7 +671,7 @@ def auto_combine( component files. Set ``concat_dim=None`` explicitly to disable concatenation. compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts'}, optional + 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts: - 'broadcast_equals': all values must be equal when variables are @@ -678,6 +682,7 @@ def auto_combine( - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. + - 'override': skip comparing and pick variable from first dataset data_vars : {'minimal', 'different', 'all' or list of str}, optional Details are in the documentation of concat coords : {'minimal', 'different', 'all' o list of str}, optional @@ -832,6 +837,7 @@ def _old_auto_combine( dim=dim, data_vars=data_vars, coords=coords, + compat=compat, fill_value=fill_value, join=join, ) @@ -850,6 +856,7 @@ def _auto_concat( coords="different", fill_value=dtypes.NA, join="outer", + compat="no_conflicts", ): if len(datasets) == 1 and dim is None: # There is nothing more to combine, so kick out early. @@ -876,5 +883,10 @@ def _auto_concat( ) dim, = concat_dims return concat( - datasets, dim=dim, data_vars=data_vars, coords=coords, fill_value=fill_value + datasets, + dim=dim, + data_vars=data_vars, + coords=coords, + fill_value=fill_value, + compat=compat, ) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index d5dfa49a8d5..e68c247d880 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -4,6 +4,7 @@ from . import dtypes, utils from .alignment import align +from .merge import unique_variable, _VALID_COMPAT from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -59,12 +60,19 @@ def concat( those corresponding to other dimensions. * list of str: The listed coordinate variables will be concatenated, in addition to the 'minimal' coordinates. - compat : {'equals', 'identical'}, optional - String indicating how to compare non-concatenated variables and - dataset global attributes for potential conflicts. 'equals' means - that all variable values and dimensions must be the same; - 'identical' means that variable attributes and global attributes - must also be equal. + compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional + String indicating how to compare non-concatenated variables of the same name for + potential conflicts. This is passed down to merge. + + - 'broadcast_equals': all values must be equal when variables are + broadcast against each other to ensure common dimensions. + - 'equals': all values and dimensions must be the same. + - 'identical': all values, dimensions and attributes must be the + same. + - 'no_conflicts': only values which are not null in both datasets + must be equal. The returned dataset then contains the combination + of all non-null values. + - 'override': skip comparing and pick variable from first dataset positions : None or list of integer arrays, optional List of integer arrays which specifies the integer positions to which to assign each dataset along the concatenated dimension. If not @@ -107,6 +115,12 @@ def concat( except StopIteration: raise ValueError("must supply at least one object to concatenate") + if compat not in _VALID_COMPAT: + raise ValueError( + "compat=%r invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" + % compat + ) + if isinstance(first_obj, DataArray): f = _dataarray_concat elif isinstance(first_obj, Dataset): @@ -143,23 +157,39 @@ def _calc_concat_dim_coord(dim): return dim, coord -def _calc_concat_over(datasets, dim, data_vars, coords): +def _calc_concat_over(datasets, dim, dim_names, data_vars, coords, compat): """ Determine which dataset variables need to be concatenated in the result, - and which can simply be taken from the first dataset. """ # Return values concat_over = set() equals = {} - if dim in datasets[0]: + if dim in dim_names: + concat_over_existing_dim = True concat_over.add(dim) + else: + concat_over_existing_dim = False + + concat_dim_lengths = [] for ds in datasets: + if concat_over_existing_dim: + if dim not in ds.dims: + if dim in ds: + ds = ds.set_coords(dim) + else: + raise ValueError("%r is not present in all datasets" % dim) concat_over.update(k for k, v in ds.variables.items() if dim in v.dims) + concat_dim_lengths.append(ds.dims.get(dim, 1)) def process_subset_opt(opt, subset): if isinstance(opt, str): if opt == "different": + if compat == "override": + raise ValueError( + "Cannot specify both %s='different' and compat='override'." + % subset + ) # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): if k not in concat_over: @@ -173,7 +203,7 @@ def process_subset_opt(opt, subset): for ds_rhs in datasets[1:]: v_rhs = ds_rhs.variables[k].compute() computed.append(v_rhs) - if not v_lhs.equals(v_rhs): + if not getattr(v_lhs, compat)(v_rhs): concat_over.add(k) equals[k] = False # computed variables are not to be re-computed @@ -209,7 +239,29 @@ def process_subset_opt(opt, subset): process_subset_opt(data_vars, "data_vars") process_subset_opt(coords, "coords") - return concat_over, equals + return concat_over, equals, concat_dim_lengths + + +# determine dimensional coordinate names and a dict mapping name to DataArray +def _parse_datasets(datasets): + + dims = set() + all_coord_names = set() + data_vars = set() # list of data_vars + dim_coords = dict() # maps dim name to variable + dims_sizes = {} # shared dimension sizes to expand variables + + for ds in datasets: + dims_sizes.update(ds.dims) + all_coord_names.update(ds.coords) + data_vars.update(ds.data_vars) + + for dim in set(ds.dims) - dims: + if dim not in dim_coords: + dim_coords[dim] = ds.coords[dim].variable + dims = dims | set(ds.dims) + + return dim_coords, dims_sizes, all_coord_names, data_vars def _dataset_concat( @@ -227,11 +279,6 @@ def _dataset_concat( """ from .dataset import Dataset - if compat not in ["equals", "identical"]: - raise ValueError( - "compat=%r invalid: must be 'equals' " "or 'identical'" % compat - ) - dim, coord = _calc_concat_dim_coord(dim) # Make sure we're working on a copy (we'll be loading variables) datasets = [ds.copy() for ds in datasets] @@ -239,62 +286,65 @@ def _dataset_concat( *datasets, join=join, copy=False, exclude=[dim], fill_value=fill_value ) - concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords) + dim_coords, dims_sizes, coord_names, data_names = _parse_datasets(datasets) + dim_names = set(dim_coords) + unlabeled_dims = dim_names - coord_names + + both_data_and_coords = coord_names & data_names + if both_data_and_coords: + raise ValueError( + "%r is a coordinate in some datasets but not others." % both_data_and_coords + ) + # we don't want the concat dimension in the result dataset yet + dim_coords.pop(dim, None) + dims_sizes.pop(dim, None) + + # case where concat dimension is a coordinate or data_var but not a dimension + if (dim in coord_names or dim in data_names) and dim not in dim_names: + datasets = [ds.expand_dims(dim) for ds in datasets] + + # determine which variables to concatentate + concat_over, equals, concat_dim_lengths = _calc_concat_over( + datasets, dim, dim_names, data_vars, coords, compat + ) + + # determine which variables to merge, and then merge them according to compat + variables_to_merge = (coord_names | data_names) - concat_over - dim_names + + result_vars = {} + if variables_to_merge: + to_merge = {var: [] for var in variables_to_merge} + + for ds in datasets: + absent_merge_vars = variables_to_merge - set(ds.variables) + if absent_merge_vars: + raise ValueError( + "variables %r are present in some datasets but not others. " + % absent_merge_vars + ) - def insert_result_variable(k, v): - assert isinstance(v, Variable) - if k in datasets[0].coords: - result_coord_names.add(k) - result_vars[k] = v + for var in variables_to_merge: + to_merge[var].append(ds.variables[var]) - # create the new dataset and add constant variables - result_vars = OrderedDict() - result_coord_names = set(datasets[0].coords) + for var in variables_to_merge: + result_vars[var] = unique_variable( + var, to_merge[var], compat=compat, equals=equals.get(var, None) + ) + else: + result_vars = OrderedDict() + result_vars.update(dim_coords) + + # assign attrs and encoding from first dataset result_attrs = datasets[0].attrs result_encoding = datasets[0].encoding - for k, v in datasets[0].variables.items(): - if k not in concat_over: - insert_result_variable(k, v) - - # check that global attributes and non-concatenated variables are fixed - # across all datasets + # check that global attributes are fixed across all datasets if necessary for ds in datasets[1:]: if compat == "identical" and not utils.dict_equiv(ds.attrs, result_attrs): - raise ValueError("dataset global attributes not equal") - for k, v in ds.variables.items(): - if k not in result_vars and k not in concat_over: - raise ValueError("encountered unexpected variable %r" % k) - elif (k in result_coord_names) != (k in ds.coords): - raise ValueError( - "%r is a coordinate in some datasets but not " "others" % k - ) - elif k in result_vars and k != dim: - # Don't use Variable.identical as it internally invokes - # Variable.equals, and we may already know the answer - if compat == "identical" and not utils.dict_equiv( - v.attrs, result_vars[k].attrs - ): - raise ValueError("variable %s not identical across datasets" % k) - - # Proceed with equals() - try: - # May be populated when using the "different" method - is_equal = equals[k] - except KeyError: - result_vars[k].load() - is_equal = v.equals(result_vars[k]) - if not is_equal: - raise ValueError("variable %s not equal across datasets" % k) + raise ValueError("Dataset global attributes not equal.") # we've already verified everything is consistent; now, calculate # shared dimension sizes so we can expand the necessary variables - dim_lengths = [ds.dims.get(dim, 1) for ds in datasets] - non_concat_dims = {} - for ds in datasets: - non_concat_dims.update(ds.dims) - non_concat_dims.pop(dim, None) - def ensure_common_dims(vars): # ensure each variable with the given name shares the same # dimensions and the same shape for all of them except along the @@ -302,25 +352,27 @@ def ensure_common_dims(vars): common_dims = tuple(pd.unique([d for v in vars for d in v.dims])) if dim not in common_dims: common_dims = (dim,) + common_dims - for var, dim_len in zip(vars, dim_lengths): + for var, dim_len in zip(vars, concat_dim_lengths): if var.dims != common_dims: - common_shape = tuple( - non_concat_dims.get(d, dim_len) for d in common_dims - ) + common_shape = tuple(dims_sizes.get(d, dim_len) for d in common_dims) var = var.set_dims(common_dims, common_shape) yield var # stack up each variable to fill-out the dataset (in order) + # n.b. this loop preserves variable order, needed for groupby. for k in datasets[0].variables: if k in concat_over: vars = ensure_common_dims([ds.variables[k] for ds in datasets]) combined = concat_vars(vars, dim, positions) - insert_result_variable(k, combined) + assert isinstance(combined, Variable) + result_vars[k] = combined result = Dataset(result_vars, attrs=result_attrs) - result = result.set_coords(result_coord_names) + result = result.set_coords(coord_names) result.encoding = result_encoding + result = result.drop(unlabeled_dims, errors="ignore") + if coord is not None: # add concat dimension last to ensure that its in the final Dataset result[coord.name] = coord @@ -342,7 +394,7 @@ def _dataarray_concat( if data_vars != "all": raise ValueError( - "data_vars is not a valid argument when " "concatenating DataArray objects" + "data_vars is not a valid argument when concatenating DataArray objects" ) datasets = [] diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7937a352cc6..d9e98839419 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1551,8 +1551,8 @@ def set_index( obj : DataArray Another DataArray, with this data but replaced coordinates. - Example - ------- + Examples + -------- >>> arr = xr.DataArray(data=np.ones((2, 3)), ... dims=['x', 'y'], ... coords={'x': diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 225507b9204..6dba659f992 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -44,6 +44,7 @@ "broadcast_equals": 2, "minimal": 3, "no_conflicts": 4, + "override": 5, } ) @@ -70,8 +71,8 @@ class MergeError(ValueError): # TODO: move this to an xarray.exceptions module? -def unique_variable(name, variables, compat="broadcast_equals"): - # type: (Any, List[Variable], str) -> Variable +def unique_variable(name, variables, compat="broadcast_equals", equals=None): + # type: (Any, List[Variable], str, bool) -> Variable """Return the unique variable from a list of variables or raise MergeError. Parameters @@ -81,8 +82,10 @@ def unique_variable(name, variables, compat="broadcast_equals"): variables : list of xarray.Variable List of Variable objects, all of which go by the same name in different inputs. - compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional + compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional Type of equality check to use. + equals: None or bool, + corresponding to result of compat test Returns ------- @@ -93,30 +96,38 @@ def unique_variable(name, variables, compat="broadcast_equals"): MergeError: if any of the variables are not equal. """ # noqa out = variables[0] - if len(variables) > 1: - combine_method = None - if compat == "minimal": - compat = "broadcast_equals" + if len(variables) == 1 or compat == "override": + return out + + combine_method = None + + if compat == "minimal": + compat = "broadcast_equals" + + if compat == "broadcast_equals": + dim_lengths = broadcast_dimension_size(variables) + out = out.set_dims(dim_lengths) + + if compat == "no_conflicts": + combine_method = "fillna" - if compat == "broadcast_equals": - dim_lengths = broadcast_dimension_size(variables) - out = out.set_dims(dim_lengths) + if equals is None: + out = out.compute() + for var in variables[1:]: + equals = getattr(out, compat)(var) + if not equals: + break - if compat == "no_conflicts": - combine_method = "fillna" + if not equals: + raise MergeError( + "conflicting values for variable %r on objects to be combined. You can skip this check by specifying compat='override'." + % (name) + ) + if combine_method: for var in variables[1:]: - if not getattr(out, compat)(var): - raise MergeError( - "conflicting values for variable %r on " - "objects to be combined:\n" - "first value: %r\nsecond value: %r" % (name, out, var) - ) - if combine_method: - # TODO: add preservation of attrs into fillna - out = getattr(out, combine_method)(var) - out.attrs = var.attrs + out = getattr(out, combine_method)(var) return out @@ -152,7 +163,7 @@ def merge_variables( priority_vars : mapping with Variable or None values, optional If provided, variables are always taken from this dict in preference to the input variable dictionaries, without checking for conflicts. - compat : {'identical', 'equals', 'broadcast_equals', 'minimal', 'no_conflicts'}, optional + compat : {'identical', 'equals', 'broadcast_equals', 'minimal', 'no_conflicts', 'override'}, optional Type of equality check to use when checking for conflicts. Returns @@ -449,7 +460,7 @@ def merge_core( ---------- objs : list of mappings All values must be convertable to labeled arrays. - compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional + compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional Compatibility checks to use when merging variables. join : {'outer', 'inner', 'left', 'right'}, optional How to combine objects with different indexes. @@ -519,7 +530,7 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): objects : Iterable[Union[xarray.Dataset, xarray.DataArray, dict]] Merge together all variables from these objects. If any of them are DataArray objects, they must have a name. - compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional + compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts: @@ -531,6 +542,7 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. + - 'override': skip comparing and pick variable from first dataset join : {'outer', 'inner', 'left', 'right', 'exact'}, optional String indicating how to combine differing indexes in objects. diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index f786a851e62..1abca30d199 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -327,13 +327,13 @@ class TestCheckShapeTileIDs: def test_check_depths(self): ds = create_test_data(0) combined_tile_ids = {(0,): ds, (0, 1): ds} - with raises_regex(ValueError, "sub-lists do not have " "consistent depths"): + with raises_regex(ValueError, "sub-lists do not have consistent depths"): _check_shape_tile_ids(combined_tile_ids) def test_check_lengths(self): ds = create_test_data(0) combined_tile_ids = {(0, 0): ds, (0, 1): ds, (0, 2): ds, (1, 0): ds, (1, 1): ds} - with raises_regex(ValueError, "sub-lists do not have " "consistent lengths"): + with raises_regex(ValueError, "sub-lists do not have consistent lengths"): _check_shape_tile_ids(combined_tile_ids) @@ -565,11 +565,6 @@ def test_combine_concat_over_redundant_nesting(self): expected = Dataset({"x": [0]}) assert_identical(expected, actual) - def test_combine_nested_but_need_auto_combine(self): - objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2], "wall": [0]})] - with raises_regex(ValueError, "cannot be combined"): - combine_nested(objs, concat_dim="x") - @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0]) def test_combine_nested_fill_value(self, fill_value): datasets = [ @@ -618,7 +613,7 @@ def test_combine_by_coords(self): assert_equal(actual, expected) objs = [Dataset({"x": 0}), Dataset({"x": 1})] - with raises_regex(ValueError, "Could not find any dimension " "coordinates"): + with raises_regex(ValueError, "Could not find any dimension coordinates"): combine_by_coords(objs) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] @@ -761,7 +756,7 @@ def test_auto_combine(self): auto_combine(objs) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] - with pytest.raises(KeyError): + with raises_regex(ValueError, "'y' is not present in all datasets"): auto_combine(objs) def test_auto_combine_previously_failed(self): diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index ee99ca027d9..00428f70966 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -5,8 +5,7 @@ import pytest from xarray import DataArray, Dataset, Variable, concat -from xarray.core import dtypes - +from xarray.core import dtypes, merge from . import ( InaccessibleArray, assert_array_equal, @@ -18,6 +17,34 @@ from .test_dataset import create_test_data +def test_concat_compat(): + ds1 = Dataset( + { + "has_x_y": (("y", "x"), [[1, 2]]), + "has_x": ("x", [1, 2]), + "no_x_y": ("z", [1, 2]), + }, + coords={"x": [0, 1], "y": [0], "z": [-1, -2]}, + ) + ds2 = Dataset( + { + "has_x_y": (("y", "x"), [[3, 4]]), + "has_x": ("x", [1, 2]), + "no_x_y": (("q", "z"), [[1, 2]]), + }, + coords={"x": [0, 1], "y": [1], "z": [-1, -2], "q": [0]}, + ) + + result = concat([ds1, ds2], dim="y", data_vars="minimal", compat="broadcast_equals") + assert_equal(ds2.no_x_y, result.no_x_y.transpose()) + + for var in ["has_x", "no_x_y"]: + assert "y" not in result[var] + + with raises_regex(ValueError, "'q' is not present in all datasets"): + concat([ds1, ds2], dim="q", data_vars="all", compat="broadcast_equals") + + class TestConcatDataset: @pytest.fixture def data(self): @@ -92,7 +119,7 @@ def test_concat_coords(self): actual = concat(objs, dim="x", coords=coords) assert_identical(expected, actual) for coords in ["minimal", []]: - with raises_regex(ValueError, "not equal across"): + with raises_regex(merge.MergeError, "conflicting values"): concat(objs, dim="x", coords=coords) def test_concat_constant_index(self): @@ -103,8 +130,10 @@ def test_concat_constant_index(self): for mode in ["different", "all", ["foo"]]: actual = concat([ds1, ds2], "y", data_vars=mode) assert_identical(expected, actual) - with raises_regex(ValueError, "not equal across datasets"): - concat([ds1, ds2], "y", data_vars="minimal") + with raises_regex(merge.MergeError, "conflicting values"): + # previously dim="y", and raised error which makes no sense. + # "foo" has dimension "y" so minimal should concatenate it? + concat([ds1, ds2], "new_dim", data_vars="minimal") def test_concat_size0(self): data = create_test_data() @@ -134,6 +163,14 @@ def test_concat_errors(self): data = create_test_data() split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] + with raises_regex(ValueError, "must supply at least one"): + concat([], "dim1") + + with raises_regex(ValueError, "Cannot specify both .*='different'"): + concat( + [data, data], dim="concat_dim", data_vars="different", compat="override" + ) + with raises_regex(ValueError, "must supply at least one"): concat([], "dim1") @@ -146,7 +183,7 @@ def test_concat_errors(self): concat([data0, data1], "dim1", compat="identical") assert_identical(data, concat([data0, data1], "dim1", compat="equals")) - with raises_regex(ValueError, "encountered unexpected"): + with raises_regex(ValueError, "present in some datasets"): data0, data1 = deepcopy(split_data) data1["foo"] = ("bar", np.random.randn(10)) concat([data0, data1], "dim1") diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index d105765481e..76b3ed1a8d6 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -825,7 +825,6 @@ def kernel(name): """Dask kernel to test pickling/unpickling and __repr__. Must be global to make it pickleable. """ - print("kernel(%s)" % name) global kernel_call_count kernel_call_count += 1 return np.ones(1, dtype=np.int64) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index ed1453ce95d..c1e6c7a5ce8 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -196,6 +196,8 @@ def test_merge_compat(self): with raises_regex(ValueError, "compat=.* invalid"): ds1.merge(ds2, compat="foobar") + assert ds1.identical(ds1.merge(ds2, compat="override")) + def test_merge_auto_align(self): ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]}) From d087fc58c40be0490151cb011802a609a774aaba Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Sep 2019 14:49:31 +0000 Subject: [PATCH 14/19] Raise error if cmap is list of colors (#3310) * Raise error if cmap is list of colors * whats-new.rst --- doc/whats-new.rst | 2 ++ xarray/plot/utils.py | 6 ++---- xarray/tests/test_plot.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 492c9279e6b..567e74052d5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,8 @@ Breaking changes - :py:func:`~xarray.concat` now requires the ``dim`` argument. Its ``indexers``, ``mode`` and ``concat_over`` kwargs have now been removed. By `Deepak Cherian `_ +- Passing a list of colors in ``cmap`` will now raise an error, having been deprecated since + v0.6.1. - Most xarray objects now define ``__slots__``. This reduces overall RAM usage by ~22% (not counting the underlying numpy buffers); on CPython 3.7/x64, a trivial DataArray has gone down from 1.9kB to 1.5kB. diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 53bbe8bacb9..f69a8af7a2f 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -737,11 +737,9 @@ def _process_cmap_cbar_kwargs( # we should not be getting a list of colors in cmap anymore # is there a better way to do this test? if isinstance(cmap, (list, tuple)): - warnings.warn( + raise ValueError( "Specifying a list of colors in cmap is deprecated. " - "Use colors keyword instead.", - DeprecationWarning, - stacklevel=3, + "Use colors keyword instead." ) cmap_kwargs = { diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index c9b041b3ba7..020a49b0114 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1320,8 +1320,8 @@ def test_cmap_and_color_both(self): with pytest.raises(ValueError): self.plotmethod(colors="k", cmap="RdBu") - def list_of_colors_in_cmap_deprecated(self): - with pytest.raises(Exception): + def list_of_colors_in_cmap_raises_error(self): + with raises_regex(ValueError, "list of colors"): self.plotmethod(cmap=["k", "b"]) @pytest.mark.slow From 99a5adc1ff65def531ccd55ca5a653dcd5bb4b47 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Sep 2019 17:22:57 +0000 Subject: [PATCH 15/19] Deprecation: groupby, resample default dim. (#3313) * Deprecation: groupby, resample default dim. * fix whats-new * found another test to fix. --- doc/whats-new.rst | 3 + xarray/core/dataset.py | 6 +- xarray/core/groupby.py | 109 ++------------------------------- xarray/core/resample.py | 5 +- xarray/tests/test_dataarray.py | 12 +--- xarray/tests/test_dataset.py | 12 ---- xarray/tests/test_groupby.py | 10 +-- 7 files changed, 17 insertions(+), 140 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 567e74052d5..57da2910d6e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,9 @@ Breaking changes error in a later release. (:issue:`3250`) by `Guido Imperiale `_. +- The default dimension for :py:meth:`~xarray.Dataset.groupby`, :py:meth:`~xarray.Dataset.resample`, + :py:meth:`~xarray.DataArray.groupby` and :py:meth:`~xarray.DataArray.resample` reductions is now the + grouping or resampling dimension. - :py:meth:`~Dataset.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous positional arguments were deprecated) - Reindexing with variables of a different dimension now raise an error (previously deprecated) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8a53e7ba757..693e94e22dd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3875,9 +3875,7 @@ def reduce( Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if dim is ALL_DIMS: - dim = None - if dim is None: + if dim is None or dim is ALL_DIMS: dims = set(self.dims) elif isinstance(dim, str) or not isinstance(dim, Iterable): dims = {dim} @@ -4803,7 +4801,7 @@ def quantile( if isinstance(dim, str): dims = {dim} - elif dim is None: + elif dim is None or dim is ALL_DIMS: dims = set(self.dims) else: dims = set(dim) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 41de4846e81..bae3057aabe 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -5,9 +5,9 @@ import numpy as np import pandas as pd -from . import dtypes, duck_array_ops, nputils, ops, utils +from . import dtypes, duck_array_ops, nputils, ops from .arithmetic import SupportsArithmetic -from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce +from .common import ImplementsArrayReduce, ImplementsDatasetReduce from .concat import concat from .options import _get_keep_attrs from .pycompat import integer_types @@ -700,19 +700,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile """ - if dim == DEFAULT_DIMS: - dim = ALL_DIMS - # TODO change this to dim = self._group_dim after - # the deprecation process - if self._obj.ndim > 1: - warnings.warn( - "Default reduction dimension will be changed to the " - "grouped dimension in a future version of xarray. To " - "silence this warning, pass dim=xarray.ALL_DIMS " - "explicitly.", - FutureWarning, - stacklevel=2, - ) + if dim is None: + dim = self._group_dim out = self.apply( self._obj.__class__.quantile, @@ -758,20 +747,6 @@ def reduce( Array with summarized data and the indicated dimension(s) removed. """ - if dim == DEFAULT_DIMS: - dim = ALL_DIMS - # TODO change this to dim = self._group_dim after - # the deprecation process - if self._obj.ndim > 1: - warnings.warn( - "Default reduction dimension will be changed to the " - "grouped dimension in a future version of xarray. To " - "silence this warning, pass dim=xarray.ALL_DIMS " - "explicitly.", - FutureWarning, - stacklevel=2, - ) - if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -780,43 +755,6 @@ def reduce_array(ar): return self.apply(reduce_array, shortcut=shortcut) - # TODO remove the following class method and DEFAULT_DIMS after the - # deprecation cycle - @classmethod - def _reduce_method(cls, func, include_skipna, numeric_only): - if include_skipna: - - def wrapped_func( - self, - dim=DEFAULT_DIMS, - axis=None, - skipna=None, - keep_attrs=None, - **kwargs - ): - return self.reduce( - func, - dim, - axis, - keep_attrs=keep_attrs, - skipna=skipna, - allow_lazy=True, - **kwargs - ) - - else: - - def wrapped_func( # type: ignore - self, dim=DEFAULT_DIMS, axis=None, keep_attrs=None, **kwargs - ): - return self.reduce( - func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs - ) - - return wrapped_func - - -DEFAULT_DIMS = utils.ReprObject("") ops.inject_reduce_methods(DataArrayGroupBy) ops.inject_binary_ops(DataArrayGroupBy) @@ -898,19 +836,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ - if dim == DEFAULT_DIMS: - dim = ALL_DIMS - # TODO change this to dim = self._group_dim after - # the deprecation process. Do not forget to remove _reduce_method - warnings.warn( - "Default reduction dimension will be changed to the " - "grouped dimension in a future version of xarray. To " - "silence this warning, pass dim=xarray.ALL_DIMS " - "explicitly.", - FutureWarning, - stacklevel=2, - ) - elif dim is None: + if dim is None: dim = self._group_dim if keep_attrs is None: @@ -921,31 +847,6 @@ def reduce_dataset(ds): return self.apply(reduce_dataset) - # TODO remove the following class method and DEFAULT_DIMS after the - # deprecation cycle - @classmethod - def _reduce_method(cls, func, include_skipna, numeric_only): - if include_skipna: - - def wrapped_func(self, dim=DEFAULT_DIMS, skipna=None, **kwargs): - return self.reduce( - func, - dim, - skipna=skipna, - numeric_only=numeric_only, - allow_lazy=True, - **kwargs - ) - - else: - - def wrapped_func(self, dim=DEFAULT_DIMS, **kwargs): # type: ignore - return self.reduce( - func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs - ) - - return wrapped_func - def assign(self, **kwargs): """Assign data variables by group. diff --git a/xarray/core/resample.py b/xarray/core/resample.py index de70ebb6950..1f2e5c0be43 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,5 +1,5 @@ from . import ops -from .groupby import DEFAULT_DIMS, DataArrayGroupBy, DatasetGroupBy +from .groupby import DataArrayGroupBy, DatasetGroupBy RESAMPLE_DIM = "__resample_dim__" @@ -307,9 +307,6 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ - if dim == DEFAULT_DIMS: - dim = None - return super().reduce(func, dim, keep_attrs, **kwargs) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 49980c75b15..01e92bdd7be 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2499,16 +2499,6 @@ def test_groupby_sum(self): assert_allclose(expected_sum_axis1, grouped.reduce(np.sum, "y")) assert_allclose(expected_sum_axis1, grouped.sum("y")) - def test_groupby_warning(self): - array = self.make_groupby_example_array() - grouped = array.groupby("y") - with pytest.warns(FutureWarning): - grouped.sum() - - @pytest.mark.skipif( - LooseVersion(xr.__version__) < LooseVersion("0.13"), - reason="not to forget the behavior change", - ) def test_groupby_sum_default(self): array = self.make_groupby_example_array() grouped = array.groupby("abc") @@ -2529,7 +2519,7 @@ def test_groupby_sum_default(self): } )["foo"] - assert_allclose(expected_sum_all, grouped.sum()) + assert_allclose(expected_sum_all, grouped.sum(dim="y")) def test_groupby_count(self): array = DataArray( diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d8401e0bd42..7d2b11d02c9 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3367,18 +3367,6 @@ def test_groupby_reduce(self): actual = data.groupby("letters").mean(ALL_DIMS) assert_allclose(expected, actual) - def test_groupby_warn(self): - data = Dataset( - { - "xy": (["x", "y"], np.random.randn(3, 4)), - "xonly": ("x", np.random.randn(3)), - "yonly": ("y", np.random.randn(4)), - "letters": ("y", ["a", "a", "b", "b"]), - } - ) - with pytest.warns(FutureWarning): - data.groupby("x").mean() - def test_groupby_math(self): def reorder_dims(x): return x.transpose("dim1", "dim2", "dim3", "time") diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 9127eb71cb7..ee17cc39064 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -134,21 +134,21 @@ def test_da_groupby_quantile(): [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])], ) - actual_x = array.groupby("x").quantile(0) + actual_x = array.groupby("x").quantile(0, dim=xr.ALL_DIMS) expected_x = xr.DataArray([1, 4], [("x", [1, 2])]) assert_identical(expected_x, actual_x) - actual_y = array.groupby("y").quantile(0) + actual_y = array.groupby("y").quantile(0, dim=xr.ALL_DIMS) expected_y = xr.DataArray([1, 22], [("y", [0, 1])]) assert_identical(expected_y, actual_y) - actual_xx = array.groupby("x").quantile(0, dim="x") + actual_xx = array.groupby("x").quantile(0) expected_xx = xr.DataArray( [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])] ) assert_identical(expected_xx, actual_xx) - actual_yy = array.groupby("y").quantile(0, dim="y") + actual_yy = array.groupby("y").quantile(0) expected_yy = xr.DataArray( [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])], @@ -164,7 +164,7 @@ def test_da_groupby_quantile(): ) g = foo.groupby(foo.time.dt.month) - actual = g.quantile(0) + actual = g.quantile(0, dim=xr.ALL_DIMS) expected = xr.DataArray( [ 0.0, From 9fbe353d55feddb1cc42a8957171a07b23dd403f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Sep 2019 18:50:04 +0000 Subject: [PATCH 16/19] auto_combine deprecation to 0.14 (#3314) --- xarray/backends/api.py | 2 +- xarray/core/combine.py | 4 ++-- xarray/tests/test_combine.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 1f0869cfc53..0d6dedac57e 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -915,7 +915,7 @@ def open_mfdataset( # Remove this after deprecation cycle from #2616 is complete basic_msg = dedent( """\ - In xarray version 0.13 the default behaviour of `open_mfdataset` + In xarray version 0.14 the default behaviour of `open_mfdataset` will change. To retain the existing behavior, pass combine='nested'. To use future default behavior, pass combine='by_coords'. See diff --git a/xarray/core/combine.py b/xarray/core/combine.py index e35bb51e030..be7fd86555c 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -716,7 +716,7 @@ def auto_combine( if not from_openmfds: basic_msg = dedent( """\ - In xarray version 0.13 `auto_combine` will be deprecated. See + In xarray version 0.14 `auto_combine` will be deprecated. See http://xarray.pydata.org/en/stable/combining.html#combining-multi""" ) warnings.warn(basic_msg, FutureWarning, stacklevel=2) @@ -758,7 +758,7 @@ def auto_combine( message += dedent( """\ The datasets supplied require both concatenation and merging. From - xarray version 0.13 this will operation will require either using the + xarray version 0.14 this will operation will require either using the new `combine_nested` function (or the `combine='nested'` option to open_mfdataset), with a nested list structure such that you can combine along the dimensions {}. Alternatively if your datasets have global diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 1abca30d199..6037669ac07 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -714,7 +714,7 @@ def test_check_for_impossible_ordering(self): @pytest.mark.filterwarnings( - "ignore:In xarray version 0.13 `auto_combine` " "will be deprecated" + "ignore:In xarray version 0.14 `auto_combine` " "will be deprecated" ) @pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer") @pytest.mark.filterwarnings("ignore:The datasets supplied") From 5b727951b60f7fa7c096b5f0a6583aad3bff11dd Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 17 Sep 2019 12:57:50 -0600 Subject: [PATCH 17/19] Release v0.13.0 --- doc/api.rst | 1 + doc/whats-new.rst | 46 ++++++++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 699687441d7..9b3d6dfaf95 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -610,6 +610,7 @@ Plotting Dataset.plot DataArray.plot + Dataset.plot.scatter plot.plot plot.contourf plot.contour diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 57da2910d6e..c40f7fc64ed 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -15,15 +15,20 @@ What's New .. _whats-new.0.13.0: -v0.13.0 (unreleased) --------------------- +v0.13.0 (10 July 2019) +---------------------- -This release increases the minimum required Python version from 3.5.0 to 3.5.3 -(:issue:`3089`). By `Guido Imperiale `_. +This release includes many exciting changes: wrapping of +`NEP18 `_ compliant +numpy-like arrays; new :py:meth:`~Dataset.plot.scatter` method that can scatter +two ``DataArrays`` in a ``Dataset`` against each other; support for converting pandas +DataFrames to xarray objects that wrap ``pydata/sparse``; and more! Breaking changes ~~~~~~~~~~~~~~~~ +- This release increases the minimum required Python version from 3.5.0 to 3.5.3 + (:issue:`3089`). By `Guido Imperiale `_. - The ``isel_points`` and ``sel_points`` methods are removed, having been deprecated since v0.10.0. These are redundant with the ``isel`` / ``sel`` methods. See :ref:`vectorized_indexing` for the details @@ -54,16 +59,16 @@ Breaking changes error in a later release. (:issue:`3250`) by `Guido Imperiale `_. -- The default dimension for :py:meth:`~xarray.Dataset.groupby`, :py:meth:`~xarray.Dataset.resample`, - :py:meth:`~xarray.DataArray.groupby` and :py:meth:`~xarray.DataArray.resample` reductions is now the +- The default dimension for :py:meth:`Dataset.groupby`, :py:meth:`Dataset.resample`, + :py:meth:`DataArray.groupby` and :py:meth:`DataArray.resample` reductions is now the grouping or resampling dimension. -- :py:meth:`~Dataset.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous +- :py:meth:`DataArray.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous positional arguments were deprecated) - Reindexing with variables of a different dimension now raise an error (previously deprecated) - :py:func:`~xarray.broadcast_array` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) -- :py:meth:`~Variable.expand_dims` is removed (previously deprecated in favor of - :py:meth:`~Variable.set_dims`) +- :py:meth:`Variable.expand_dims` is removed (previously deprecated in favor of + :py:meth:`Variable.set_dims`) New functions/methods ~~~~~~~~~~~~~~~~~~~~~ @@ -102,13 +107,15 @@ New functions/methods Currently only :py:meth:`Dataset.plot.scatter` is implemented. By `Yohai Bar Sinai `_ and `Deepak Cherian `_ -- Added `head`, `tail` and `thin` methods to `Dataset` and `DataArray`. (:issue:`319`) - By `Gerardo Rivera `_. +- Added :py:meth:`DataArray.head`, :py:meth:`DataArray.tail` and :py:meth:`DataArray.thin`; + as well as :py:meth:`Dataset.head`, :py:meth:`Dataset.tail` and :py:meth:`Dataset.thin` methods. + (:issue:`319`) By `Gerardo Rivera `_. Enhancements ~~~~~~~~~~~~ - Multiple enhancements to :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset`. + By `Deepak Cherian `_ - Added ``compat='override'``. When merging, this option picks the variable from the first dataset and skips all comparisons. @@ -131,8 +138,6 @@ Enhancements data = xr.open_mfdataset(files, concat_dim='time', data_vars='minimal', coords='minimal', compat='override', join='override') - By `Deepak Cherian `_: - - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if ``append_dim`` is set, as it will automatically be set to ``'a'`` internally. By `David Brochart `_. @@ -156,7 +161,8 @@ Enhancements when the user passes invalid arguments (:issue:`3176`). By `Gregory Gundersen `_. -- :py:func:`filter_by_attrs` now filters the coordinates as well as the variables. By `Spencer Jones `_. +- :py:func:`filter_by_attrs` now filters the coordinates as well as the variables. + By `Spencer Jones `_. Bug fixes ~~~~~~~~~ @@ -195,9 +201,7 @@ Bug fixes - Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy array (:issue:`3284`). By `Mathias Hauser `_. - Fixed bug in :meth:`DataArray.quantile` failing to keep attributes when - `keep_attrs` was True (:issue:`3304`). By David Huard `_. - -.. _whats-new.0.12.3: + `keep_attrs` was True (:issue:`3304`). By David Huard ``_. Documentation ~~~~~~~~~~~~~ @@ -210,6 +214,8 @@ Documentation (:issue:`3227`). By `Gregory Gundersen `_. +.. _whats-new.0.12.3: + v0.12.3 (10 July 2019) ---------------------- @@ -224,14 +230,14 @@ New functions/methods as described in :ref:`reshape.stacking_different`. By `Noah Brenowitz `_. +Enhancements +~~~~~~~~~~~~ + - Support for renaming ``Dataset`` variables and dimensions independently with :py:meth:`~Dataset.rename_vars` and :py:meth:`~Dataset.rename_dims` (:issue:`3026`). By `Julia Kent `_. -Enhancements -~~~~~~~~~~~~ - - Add ``scales``, ``offsets``, ``units`` and ``descriptions`` attributes to :py:class:`~xarray.DataArray` returned by :py:func:`~xarray.open_rasterio`. (:issue:`3013`) From 3814f3cdbab6da968f51767c4e67d18bdf077761 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 17 Sep 2019 13:35:24 -0600 Subject: [PATCH 18/19] Revert to dev version --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c40f7fc64ed..63e0d34523c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -13,6 +13,11 @@ What's New import xarray as xr np.random.seed(123456) +.. _whats-new.0.13.1: + +v0.13.1 (unreleased) +-------------------- + .. _whats-new.0.13.0: v0.13.0 (10 July 2019) From 02e96618ccdc13d6fa7165278b934ab204dfeef2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 17 Sep 2019 13:42:37 -0600 Subject: [PATCH 19/19] Fix whats-new date :/ --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 63e0d34523c..d50b2d53f92 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -20,8 +20,8 @@ v0.13.1 (unreleased) .. _whats-new.0.13.0: -v0.13.0 (10 July 2019) ----------------------- +v0.13.0 (17 Sep 2019) +--------------------- This release includes many exciting changes: wrapping of `NEP18 `_ compliant