From 3666a6fb7226601002f408d56e81837ab69b8d4e Mon Sep 17 00:00:00 2001 From: Ant Gib <57914115+antscloud@users.noreply.github.com> Date: Sat, 15 Jan 2022 18:27:56 +0100 Subject: [PATCH 01/68] Fix wrong typing for tolerance in reindex (#6037) --- doc/whats-new.rst | 3 ++- xarray/core/alignment.py | 7 ++++++- xarray/core/dataarray.py | 12 ++++++++++-- xarray/core/dataset.py | 14 +++++++++++--- xarray/tests/test_dataarray.py | 6 +++++- xarray/tests/test_dataset.py | 10 +++++++++- 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 73cc15b50ff..bbccc95a80d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -121,7 +121,8 @@ Documentation By `Deepak Cherian `_, `Maximilian Roos `_, `Jimmy Westling `_ . - +- Add list-like possibility for tolerance parameter in the reindex functions. + By `Antoine Gibek `_, Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index a53ac094253..f9342e2a82a 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -7,6 +7,7 @@ Any, Dict, Hashable, + Iterable, Mapping, Optional, Tuple, @@ -504,7 +505,7 @@ def reindex_variables( indexes: Mapping[Any, Index], indexers: Mapping, method: Optional[str] = None, - tolerance: Any = None, + tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, copy: bool = True, fill_value: Optional[Any] = dtypes.NA, sparse: bool = False, @@ -538,6 +539,10 @@ def reindex_variables( Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like must be the same size as the index and its dtype + must exactly match the index’s type. copy : bool, optional If ``copy=True``, data in the return values is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 105271cef61..7f29d3b6320 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1468,7 +1468,7 @@ def reindex_like( self, other: Union["DataArray", Dataset], method: str = None, - tolerance=None, + tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, copy: bool = True, fill_value=dtypes.NA, ) -> "DataArray": @@ -1496,6 +1496,10 @@ def reindex_like( Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like must be the same size as the index and its dtype + must exactly match the index’s type. copy : bool, optional If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed @@ -1530,7 +1534,7 @@ def reindex( self, indexers: Mapping[Any, Any] = None, method: str = None, - tolerance=None, + tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, copy: bool = True, fill_value=dtypes.NA, **indexers_kwargs: Any, @@ -1563,6 +1567,10 @@ def reindex( Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like must be the same size as the index and its dtype + must exactly match the index’s type. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill values. Use this diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3d05d56492b..4e8001ca389 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2690,7 +2690,7 @@ def reindex_like( self, other: Union["Dataset", "DataArray"], method: str = None, - tolerance: Number = None, + tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, copy: bool = True, fill_value: Any = dtypes.NA, ) -> "Dataset": @@ -2718,6 +2718,10 @@ def reindex_like( Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like must be the same size as the index and its dtype + must exactly match the index’s type. copy : bool, optional If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed @@ -2751,7 +2755,7 @@ def reindex( self, indexers: Mapping[Any, Any] = None, method: str = None, - tolerance: Number = None, + tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, copy: bool = True, fill_value: Any = dtypes.NA, **indexers_kwargs: Any, @@ -2779,6 +2783,10 @@ def reindex( Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like must be the same size as the index and its dtype + must exactly match the index’s type. copy : bool, optional If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed @@ -2961,7 +2969,7 @@ def _reindex( self, indexers: Mapping[Any, Any] = None, method: str = None, - tolerance: Number = None, + tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, copy: bool = True, fill_value: Any = dtypes.NA, sparse: bool = False, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 72ccc80bd06..f1945b0e224 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1528,13 +1528,17 @@ def test_reindex_regressions(self): re_dtype = x.reindex_like(y, method="pad").dtype assert x.dtype == re_dtype - def test_reindex_method(self): + def test_reindex_method(self) -> None: x = DataArray([10, 20], dims="y", coords={"y": [0, 1]}) y = [-0.1, 0.5, 1.1] actual = x.reindex(y=y, method="backfill", tolerance=0.2) expected = DataArray([10, np.nan, np.nan], coords=[("y", y)]) assert_identical(expected, actual) + actual = x.reindex(y=y, method="backfill", tolerance=[0.1, 0.1, 0.01]) + expected = DataArray([10, np.nan, np.nan], coords=[("y", y)]) + assert_identical(expected, actual) + alt = Dataset({"y": y}) actual = x.reindex_like(alt, method="backfill") expected = DataArray([10, 20, np.nan], coords=[("y", y)]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c8770601c30..c0c1f2224cf 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1883,7 +1883,7 @@ def test_reindex_variables_copied(self): for k in data.variables: assert reindexed_data.variables[k] is not data.variables[k] - def test_reindex_method(self): + def test_reindex_method(self) -> None: ds = Dataset({"x": ("y", [10, 20]), "y": [0, 1]}) y = [-0.5, 0.5, 1.5] actual = ds.reindex(y=y, method="backfill") @@ -1894,6 +1894,14 @@ def test_reindex_method(self): expected = Dataset({"x": ("y", 3 * [np.nan]), "y": y}) assert_identical(expected, actual) + actual = ds.reindex(y=y, method="backfill", tolerance=[0.1, 0.5, 0.1]) + expected = Dataset({"x": ("y", [np.nan, 20, np.nan]), "y": y}) + assert_identical(expected, actual) + + actual = ds.reindex(y=[0.1, 0.1, 1], tolerance=[0, 0.1, 0], method="nearest") + expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": [0.1, 0.1, 1]}) + assert_identical(expected, actual) + actual = ds.reindex(y=y, method="pad") expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": y}) assert_identical(expected, actual) From 8c0166cba410eb96c18efbc34d7d8af8e448df6e Mon Sep 17 00:00:00 2001 From: Michael Delgado Date: Tue, 18 Jan 2022 14:45:38 -0800 Subject: [PATCH 02/68] allow 1 non-null value in interpolate_na with method="nearest" (#6144) * allow 1 non-null value in interpolate_na with method="nearest" * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add PR reference to whatsnew Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mathias Hauser --- doc/whats-new.rst | 5 ++++- xarray/core/missing.py | 4 ++-- xarray/tests/test_missing.py | 33 ++++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bbccc95a80d..cb0e9b654bd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,7 +32,7 @@ Breaking changes - Rely on matplotlib's default datetime converters instead of pandas' (:issue:`6102`, :pull:`6109`). By `Jimmy Westling `_. - Improve repr readability when there are a large number of dimensions in datasets or dataarrays by - wrapping the text once the maximum display width has been exceeded. (:issue: `5546`, :pull:`5662`) + wrapping the text once the maximum display width has been exceeded. (:issue:`5546`, :pull:`5662`) By `Jimmy Westling `_. @@ -57,6 +57,9 @@ Bug fixes - Fix applying function with non-xarray arguments using :py:func:`xr.map_blocks`. By `Cindy Chiao `_. +- No longer raise an error for an all-nan-but-one argument to + :py:meth:`DataArray.interpolate_na` when using `method='nearest'` (:issue:`5994`, :pull:`6144`). + By `Michael Delgado `_. - `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 6749e5294f0..acfbb032c23 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -386,9 +386,9 @@ def func_interpolate_na(interpolator, y, x, **kwargs): nans = pd.isnull(y) nonans = ~nans - # fast track for no-nans and all-nans cases + # fast track for no-nans, all nan but one, and all-nans cases n_nans = nans.sum() - if n_nans == 0 or n_nans == len(y): + if n_nans == 0 or n_nans >= len(y) - 1: return y f = interpolator(x[nonans], y[nonans], **kwargs) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 4121b62a9e8..2bf5af31fa5 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -255,19 +255,30 @@ def test_interpolate(): assert_equal(actual, expected) -def test_interpolate_nonans(): - - vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) - expected = xr.DataArray(vals, dims="x") - actual = expected.interpolate_na(dim="x") - assert_equal(actual, expected) - - @requires_scipy -def test_interpolate_allnans(): - vals = np.full(6, np.nan, dtype=np.float64) +@pytest.mark.parametrize( + "method,vals", + [ + pytest.param(method, vals, id=f"{desc}:{method}") + for method in [ + "linear", + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "polynomial", + ] + for (desc, vals) in [ + ("no nans", np.array([1, 2, 3, 4, 5, 6], dtype=np.float64)), + ("one nan", np.array([1, np.nan, np.nan], dtype=np.float64)), + ("all nans", np.full(6, np.nan, dtype=np.float64)), + ] + ], +) +def test_interp1d_fastrack(method, vals): expected = xr.DataArray(vals, dims="x") - actual = expected.interpolate_na(dim="x") + actual = expected.interpolate_na(dim="x", method=method) assert_equal(actual, expected) From d3b6aa6d8b997df115a53c001d00222a0f92f63a Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 19 Jan 2022 00:39:12 +0100 Subject: [PATCH 03/68] unpin dask again (#6171) --- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index a9074b6c949..8dafb6f80f6 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -8,7 +8,7 @@ dependencies: # - cdms2 # Not available on Windows # - cfgrib # Causes Python interpreter crash on Windows: https://github.com/pydata/xarray/pull/3340 - cftime - - dask-core != 2021.12.0 # https://github.com/pydata/xarray/pull/6111, can remove on next release + - dask-core - distributed - fsspec!=2021.7.0 - h5netcdf diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 890220b54fb..eab06fbe0f8 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -10,7 +10,7 @@ dependencies: - cdms2 - cfgrib - cftime - - dask-core != 2021.12.0 # https://github.com/pydata/xarray/pull/6111, can remove on next release + - dask-core - distributed - fsspec!=2021.7.0 - h5netcdf From 84961e6a2b30f495ddc55c4024f105a3f89e6243 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 19 Jan 2022 20:35:40 +0100 Subject: [PATCH 04/68] keep attrs in xarray.where (#4687) Co-authored-by: Deepak Cherian Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 3 ++- xarray/core/computation.py | 13 ++++++++++++- xarray/tests/test_computation.py | 9 +++++++++ xarray/tests/test_units.py | 5 +---- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cb0e9b654bd..89040c6dc5b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,8 @@ New Features ~~~~~~~~~~~~ - New top-level function :py:func:`cross`. (:issue:`3279`, :pull:`5365`). By `Jimmy Westling `_. - +- ``keep_attrs`` support for :py:func:`where` (:issue:`4141`, :issue:`4682`, :pull:`4687`). + By `Justus Magin `_. - Enable the limit option for dask array in the following methods :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` (:issue:`6112`) By `Joseph Nowak `_. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 9fe93c88734..5e6340feed2 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1727,7 +1727,7 @@ def dot(*arrays, dims=None, **kwargs): return result.transpose(*all_dims, missing_dims="ignore") -def where(cond, x, y): +def where(cond, x, y, keep_attrs=None): """Return elements from `x` or `y` depending on `cond`. Performs xarray-like broadcasting across input arguments. @@ -1743,6 +1743,8 @@ def where(cond, x, y): values to choose from where `cond` is True y : scalar, array, Variable, DataArray or Dataset values to choose from where `cond` is False + keep_attrs : bool or str or callable, optional + How to treat attrs. If True, keep the attrs of `x`. Returns ------- @@ -1808,6 +1810,14 @@ def where(cond, x, y): Dataset.where, DataArray.where : equivalent methods """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + + if keep_attrs is True: + # keep the attributes of x, the second parameter, by default to + # be consistent with the `where` method of `DataArray` and `Dataset` + keep_attrs = lambda attrs, context: attrs[1] + # alignment for three arguments is complicated, so don't support it yet return apply_ufunc( duck_array_ops.where, @@ -1817,6 +1827,7 @@ def where(cond, x, y): join="exact", dataset_join="exact", dask="allowed", + keep_attrs=keep_attrs, ) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index c9a10b7cc43..a51bfb03641 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1922,6 +1922,15 @@ def test_where() -> None: assert_identical(expected, actual) +def test_where_attrs() -> None: + cond = xr.DataArray([True, False], dims="x", attrs={"attr": "cond"}) + x = xr.DataArray([1, 1], dims="x", attrs={"attr": "x"}) + y = xr.DataArray([0, 0], dims="x", attrs={"attr": "y"}) + actual = xr.where(cond, x, y, keep_attrs=True) + expected = xr.DataArray([1, 0], dims="x", attrs={"attr": "x"}) + assert_identical(expected, actual) + + @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize("use_datetime", [True, False]) def test_polyval(use_dask, use_datetime) -> None: diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index f36143c52c3..1225ecde5fb 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2429,10 +2429,7 @@ def test_binary_operations(self, func, dtype): ( pytest.param(operator.lt, id="less_than"), pytest.param(operator.ge, id="greater_equal"), - pytest.param( - operator.eq, - id="equal", - ), + pytest.param(operator.eq, id="equal"), ), ) @pytest.mark.parametrize( From 176c6ebc5366cf5e98657c8f8ada324e96c5db1c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 19 Jan 2022 12:39:39 -0800 Subject: [PATCH 05/68] Add pyupgrade onto pre-commit (#6152) * Add pyupgrade onto pre-commit * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update doc/whats-new.rst Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * . Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- .pre-commit-config.yaml | 24 +- doc/whats-new.rst | 2 + xarray/backends/plugins.py | 6 +- xarray/coding/cftime_offsets.py | 12 +- xarray/coding/strings.py | 4 +- xarray/coding/times.py | 8 +- xarray/core/common.py | 63 ++--- xarray/core/computation.py | 59 ++-- xarray/core/concat.py | 78 +++--- xarray/core/dataarray.py | 340 ++++++++++++----------- xarray/core/dataset.py | 467 ++++++++++++++++---------------- xarray/core/formatting.py | 4 +- xarray/core/formatting_html.py | 4 +- xarray/core/indexes.py | 2 +- xarray/core/indexing.py | 6 +- xarray/core/merge.py | 103 ++++--- xarray/core/missing.py | 2 +- xarray/core/parallel.py | 24 +- xarray/core/rolling.py | 4 +- xarray/core/rolling_exp.py | 4 +- xarray/core/utils.py | 6 +- xarray/core/variable.py | 75 +++-- xarray/plot/dataset_plot.py | 4 +- xarray/testing.py | 6 +- xarray/tests/test_backends.py | 10 +- xarray/tests/test_concat.py | 1 - xarray/tests/test_plugins.py | 2 +- xarray/tests/test_variable.py | 2 +- xarray/ufuncs.py | 4 +- 29 files changed, 631 insertions(+), 695 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ecc69e5783a..5eb2a244ee5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,11 +6,24 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - # isort should run before black as black sometimes tweaks the isort output + - id: debug-statements + - id: mixed-line-ending + # This wants to go before isort & flake8 + - repo: https://github.com/myint/autoflake + rev: "v1.4" + hooks: + - id: autoflake # isort should run before black as black sometimes tweaks the isort output + args: ["--in-place", "--ignore-init-module-imports"] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: - id: isort + - repo: https://github.com/asottile/pyupgrade + rev: v2.31.0 + hooks: + - id: pyupgrade + args: + - "--py37-plus" # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black rev: 21.12b0 @@ -47,12 +60,3 @@ repos: typing-extensions==3.10.0.0, numpy, ] - # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194 - # - repo: https://github.com/asottile/pyupgrade - # rev: v1.22.1 - # hooks: - # - id: pyupgrade - # args: - # - "--py3-only" - # # remove on f-strings in Py3.7 - # - "--keep-percent-format" diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 89040c6dc5b..8896dd62379 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -78,6 +78,8 @@ Internal Changes - Removed internal checks for ``pd.Panel`` (:issue:`6145`). By `Matthew Roeschke `_. +- Add ``pyupgrade`` pre-commit hook (:pull:`6152`). + By `Maximilian Roos `_. .. _whats-new.0.20.2: diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 0a9ffcbda22..f03782321e7 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -168,10 +168,8 @@ def get_backend(engine): backend = engine() else: raise TypeError( - ( - "engine must be a string or a subclass of " - f"xarray.backends.BackendEntrypoint: {engine}" - ) + "engine must be a string or a subclass of " + f"xarray.backends.BackendEntrypoint: {engine}" ) return backend diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 2db6d4e8097..6557590dbb8 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -160,7 +160,7 @@ def rollback(self, date): return date - type(self)() def __str__(self): - return "<{}: n={}>".format(type(self).__name__, self.n) + return f"<{type(self).__name__}: n={self.n}>" def __repr__(self): return str(self) @@ -399,10 +399,10 @@ def __mul__(self, other): return type(self)(n=other * self.n, month=self.month) def rule_code(self): - return "{}-{}".format(self._freq, _MONTH_ABBREVIATIONS[self.month]) + return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}" def __str__(self): - return "<{}: n={}, month={}>".format(type(self).__name__, self.n, self.month) + return f"<{type(self).__name__}: n={self.n}, month={self.month}>" class QuarterBegin(QuarterOffset): @@ -485,10 +485,10 @@ def __mul__(self, other): return type(self)(n=other * self.n, month=self.month) def rule_code(self): - return "{}-{}".format(self._freq, _MONTH_ABBREVIATIONS[self.month]) + return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}" def __str__(self): - return "<{}: n={}, month={}>".format(type(self).__name__, self.n, self.month) + return f"<{type(self).__name__}: n={self.n}, month={self.month}>" class YearBegin(YearOffset): @@ -741,7 +741,7 @@ def _generate_linear_range(start, end, periods): total_seconds = (end - start).total_seconds() values = np.linspace(0.0, total_seconds, periods, endpoint=True) - units = "seconds since {}".format(format_cftime_datetime(start)) + units = f"seconds since {format_cftime_datetime(start)}" calendar = start.calendar return cftime.num2date( values, units=units, calendar=calendar, only_use_cftime_datetimes=True diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index aeffab0c2d7..e4b1e906160 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -18,7 +18,7 @@ def create_vlen_dtype(element_type): if element_type not in (str, bytes): - raise TypeError("unsupported type for vlen_dtype: {!r}".format(element_type)) + raise TypeError(f"unsupported type for vlen_dtype: {element_type!r}") # based on h5py.special_dtype return np.dtype("O", metadata={"element_type": element_type}) @@ -227,7 +227,7 @@ def shape(self): return self.array.shape[:-1] def __repr__(self): - return "{}({!r})".format(type(self).__name__, self.array) + return f"{type(self).__name__}({self.array!r})" def __getitem__(self, key): # require slicing the last dimension completely diff --git a/xarray/coding/times.py b/xarray/coding/times.py index c89b0c100cd..0eb8707f0cc 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -131,8 +131,8 @@ def _ensure_padded_year(ref_date): matches_start_digits = re.match(r"(\d+)(.*)", ref_date) if not matches_start_digits: raise ValueError(f"invalid reference date for time units: {ref_date}") - ref_year, everything_else = [s for s in matches_start_digits.groups()] - ref_date_padded = "{:04d}{}".format(int(ref_year), everything_else) + ref_year, everything_else = (s for s in matches_start_digits.groups()) + ref_date_padded = f"{int(ref_year):04d}{everything_else}" warning_msg = ( f"Ambiguous reference date string: {ref_date}. The first value is " @@ -155,7 +155,7 @@ def _unpack_netcdf_time_units(units): if not matches: raise ValueError(f"invalid time units: {units}") - delta_units, ref_date = [s.strip() for s in matches.groups()] + delta_units, ref_date = (s.strip() for s in matches.groups()) ref_date = _ensure_padded_year(ref_date) return delta_units, ref_date @@ -545,7 +545,7 @@ def _should_cftime_be_used(source, target_calendar, use_cftime): def _cleanup_netcdf_time_units(units): delta, ref_date = _unpack_netcdf_time_units(units) try: - units = "{} since {}".format(delta, format_timestamp(ref_date)) + units = f"{delta} since {format_timestamp(ref_date)}" except (OutOfBoundsDatetime, ValueError): # don't worry about reifying the units if they're out of bounds or # formatted badly diff --git a/xarray/core/common.py b/xarray/core/common.py index b5dc3bf0e20..039b03aec56 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -8,16 +8,11 @@ TYPE_CHECKING, Any, Callable, - Dict, Hashable, Iterable, Iterator, - List, Mapping, - Optional, - Tuple, TypeVar, - Union, overload, ) @@ -164,9 +159,7 @@ def __iter__(self: Any) -> Iterator[Any]: raise TypeError("iteration over a 0-d array") return self._iter() - def get_axis_num( - self, dim: Union[Hashable, Iterable[Hashable]] - ) -> Union[int, Tuple[int, ...]]: + def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, ...]: """Return axis number(s) corresponding to dimension(s) in this array. Parameters @@ -244,7 +237,7 @@ def __getattr__(self, name: str) -> Any: with suppress(KeyError): return source[name] raise AttributeError( - "{!r} object has no attribute {!r}".format(type(self).__name__, name) + f"{type(self).__name__!r} object has no attribute {name!r}" ) # This complicated two-method design boosts overall performance of simple operations @@ -284,37 +277,37 @@ def __setattr__(self, name: str, value: Any) -> None: "assignment (e.g., `ds['name'] = ...`) instead of assigning variables." ) from e - def __dir__(self) -> List[str]: + def __dir__(self) -> list[str]: """Provide method name lookup and completion. Only provide 'public' methods. """ - extra_attrs = set( + extra_attrs = { item for source in self._attr_sources for item in source if isinstance(item, str) - ) + } return sorted(set(dir(type(self))) | extra_attrs) - def _ipython_key_completions_(self) -> List[str]: + def _ipython_key_completions_(self) -> list[str]: """Provide method for the key-autocompletions in IPython. See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion For the details. """ - items = set( + items = { item for source in self._item_sources for item in source if isinstance(item, str) - ) + } return list(items) def get_squeeze_dims( xarray_obj, - dim: Union[Hashable, Iterable[Hashable], None] = None, - axis: Union[int, Iterable[int], None] = None, -) -> List[Hashable]: + dim: Hashable | Iterable[Hashable] | None = None, + axis: int | Iterable[int] | None = None, +) -> list[Hashable]: """Get a list of dimensions to squeeze out.""" if dim is not None and axis is not None: raise ValueError("cannot use both parameters `axis` and `dim`") @@ -346,15 +339,15 @@ def get_squeeze_dims( class DataWithCoords(AttrAccessMixin): """Shared base class for Dataset and DataArray.""" - _close: Optional[Callable[[], None]] + _close: Callable[[], None] | None __slots__ = ("_close",) def squeeze( self, - dim: Union[Hashable, Iterable[Hashable], None] = None, + dim: Hashable | Iterable[Hashable] | None = None, drop: bool = False, - axis: Union[int, Iterable[int], None] = None, + axis: int | Iterable[int] | None = None, ): """Return a new object with squeezed data. @@ -416,8 +409,8 @@ def get_index(self, key: Hashable) -> pd.Index: return pd.Index(range(self.sizes[key]), name=key) def _calc_assign_results( - self: C, kwargs: Mapping[Any, Union[T, Callable[[C], T]]] - ) -> Dict[Hashable, T]: + self: C, kwargs: Mapping[Any, T | Callable[[C], T]] + ) -> dict[Hashable, T]: return {k: v(self) if callable(v) else v for k, v in kwargs.items()} def assign_coords(self, coords=None, **coords_kwargs): @@ -535,7 +528,7 @@ def assign_attrs(self, *args, **kwargs): def pipe( self, - func: Union[Callable[..., T], Tuple[Callable[..., T], str]], + func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs, ) -> T: @@ -802,7 +795,7 @@ def groupby_bins( }, ) - def weighted(self: T_DataWithCoords, weights: "DataArray") -> Weighted[T_Xarray]: + def weighted(self: T_DataWithCoords, weights: DataArray) -> Weighted[T_Xarray]: """ Weighted operations. @@ -825,7 +818,7 @@ def rolling( self, dim: Mapping[Any, int] = None, min_periods: int = None, - center: Union[bool, Mapping[Any, bool]] = False, + center: bool | Mapping[Any, bool] = False, **window_kwargs: int, ): """ @@ -940,7 +933,7 @@ def coarsen( self, dim: Mapping[Any, int] = None, boundary: str = "exact", - side: Union[str, Mapping[Any, str]] = "left", + side: str | Mapping[Any, str] = "left", coord_func: str = "mean", **window_kwargs: int, ): @@ -1290,7 +1283,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): return ops.where_method(self, cond, other) - def set_close(self, close: Optional[Callable[[], None]]) -> None: + def set_close(self, close: Callable[[], None] | None) -> None: """Register the function that releases any resources linked to this object. This method controls how xarray cleans up resources associated @@ -1523,20 +1516,20 @@ def __getitem__(self, value): @overload def full_like( - other: "Dataset", + other: Dataset, fill_value, - dtype: Union[DTypeLike, Mapping[Any, DTypeLike]] = None, -) -> "Dataset": + dtype: DTypeLike | Mapping[Any, DTypeLike] = None, +) -> Dataset: ... @overload -def full_like(other: "DataArray", fill_value, dtype: DTypeLike = None) -> "DataArray": +def full_like(other: DataArray, fill_value, dtype: DTypeLike = None) -> DataArray: ... @overload -def full_like(other: "Variable", fill_value, dtype: DTypeLike = None) -> "Variable": +def full_like(other: Variable, fill_value, dtype: DTypeLike = None) -> Variable: ... @@ -1815,9 +1808,9 @@ def ones_like(other, dtype: DTypeLike = None): def get_chunksizes( variables: Iterable[Variable], -) -> Mapping[Any, Tuple[int, ...]]: +) -> Mapping[Any, tuple[int, ...]]: - chunks: Dict[Any, Tuple[int, ...]] = {} + chunks: dict[Any, tuple[int, ...]] = {} for v in variables: if hasattr(v.data, "chunks"): for dim, c in v.chunksizes.items(): diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 5e6340feed2..7273d25253d 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -13,15 +13,10 @@ AbstractSet, Any, Callable, - Dict, Hashable, Iterable, - List, Mapping, - Optional, Sequence, - Tuple, - Union, ) import numpy as np @@ -197,7 +192,7 @@ def result_name(objects: list) -> Any: return name -def _get_coords_list(args) -> List[Coordinates]: +def _get_coords_list(args) -> list[Coordinates]: coords_list = [] for arg in args: try: @@ -214,7 +209,7 @@ def build_output_coords( signature: _UFuncSignature, exclude_dims: AbstractSet = frozenset(), combine_attrs: str = "override", -) -> "List[Dict[Any, Variable]]": +) -> list[dict[Any, Variable]]: """Build output coordinates for an operation. Parameters @@ -309,11 +304,11 @@ def apply_dataarray_vfunc( return out -def ordered_set_union(all_keys: List[Iterable]) -> Iterable: +def ordered_set_union(all_keys: list[Iterable]) -> Iterable: return {key: None for keys in all_keys for key in keys}.keys() -def ordered_set_intersection(all_keys: List[Iterable]) -> Iterable: +def ordered_set_intersection(all_keys: list[Iterable]) -> Iterable: intersection = set(all_keys[0]) for keys in all_keys[1:]: intersection.intersection_update(keys) @@ -331,7 +326,7 @@ def assert_and_return_exact_match(all_keys): return first_keys -_JOINERS: Dict[str, Callable] = { +_JOINERS: dict[str, Callable] = { "inner": ordered_set_intersection, "outer": ordered_set_union, "left": operator.itemgetter(0), @@ -340,17 +335,15 @@ def assert_and_return_exact_match(all_keys): } -def join_dict_keys( - objects: Iterable[Union[Mapping, Any]], how: str = "inner" -) -> Iterable: +def join_dict_keys(objects: Iterable[Mapping | Any], how: str = "inner") -> Iterable: joiner = _JOINERS[how] all_keys = [obj.keys() for obj in objects if hasattr(obj, "keys")] return joiner(all_keys) def collect_dict_values( - objects: Iterable[Union[Mapping, Any]], keys: Iterable, fill_value: object = None -) -> List[list]: + objects: Iterable[Mapping | Any], keys: Iterable, fill_value: object = None +) -> list[list]: return [ [obj.get(key, fill_value) if is_dict_like(obj) else obj for obj in objects] for key in keys @@ -368,9 +361,9 @@ def _as_variables_or_variable(arg): def _unpack_dict_tuples( - result_vars: Mapping[Any, Tuple[Variable, ...]], num_outputs: int -) -> Tuple[Dict[Hashable, Variable], ...]: - out: Tuple[Dict[Hashable, Variable], ...] = tuple({} for _ in range(num_outputs)) + result_vars: Mapping[Any, tuple[Variable, ...]], num_outputs: int +) -> tuple[dict[Hashable, Variable], ...]: + out: tuple[dict[Hashable, Variable], ...] = tuple({} for _ in range(num_outputs)) for name, values in result_vars.items(): for value, results_dict in zip(values, out): results_dict[name] = value @@ -398,7 +391,7 @@ def apply_dict_of_variables_vfunc( def _fast_dataset( - variables: Dict[Hashable, Variable], coord_variables: Mapping[Hashable, Variable] + variables: dict[Hashable, Variable], coord_variables: Mapping[Hashable, Variable] ) -> Dataset: """Create a dataset as quickly as possible. @@ -528,9 +521,9 @@ def apply_groupby_func(func, *args): def unified_dim_sizes( variables: Iterable[Variable], exclude_dims: AbstractSet = frozenset() -) -> Dict[Hashable, int]: +) -> dict[Hashable, int]: - dim_sizes: Dict[Hashable, int] = {} + dim_sizes: dict[Hashable, int] = {} for var in variables: if len(set(var.dims)) < len(var.dims): @@ -556,8 +549,8 @@ def unified_dim_sizes( def broadcast_compat_data( variable: Variable, - broadcast_dims: Tuple[Hashable, ...], - core_dims: Tuple[Hashable, ...], + broadcast_dims: tuple[Hashable, ...], + core_dims: tuple[Hashable, ...], ) -> Any: data = variable.data @@ -595,7 +588,7 @@ def broadcast_compat_data( data = duck_array_ops.transpose(data, order) if new_dims != reordered_dims: - key_parts: List[Optional[slice]] = [] + key_parts: list[slice | None] = [] for dim in new_dims: if dim in set_old_dims: key_parts.append(SLICE_NONE) @@ -810,19 +803,19 @@ def apply_ufunc( func: Callable, *args: Any, input_core_dims: Sequence[Sequence] = None, - output_core_dims: Optional[Sequence[Sequence]] = ((),), + output_core_dims: Sequence[Sequence] | None = ((),), exclude_dims: AbstractSet = frozenset(), vectorize: bool = False, join: str = "exact", dataset_join: str = "exact", dataset_fill_value: object = _NO_FILL_VALUE, - keep_attrs: Union[bool, str] = None, - kwargs: Mapping = None, + keep_attrs: bool | str | None = None, + kwargs: Mapping | None = None, dask: str = "forbidden", - output_dtypes: Sequence = None, - output_sizes: Mapping[Any, int] = None, + output_dtypes: Sequence | None = None, + output_sizes: Mapping[Any, int] | None = None, meta: Any = None, - dask_gufunc_kwargs: Dict[str, Any] = None, + dask_gufunc_kwargs: dict[str, Any] | None = None, ) -> Any: """Apply a vectorized function for unlabeled arrays on xarray objects. @@ -1375,8 +1368,8 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None): def cross( - a: Union[DataArray, Variable], b: Union[DataArray, Variable], *, dim: Hashable -) -> Union[DataArray, Variable]: + a: DataArray | Variable, b: DataArray | Variable, *, dim: Hashable +) -> DataArray | Variable: """ Compute the cross product of two (arrays of) vectors. @@ -1926,7 +1919,7 @@ def _calc_idxminmax( return res -def unify_chunks(*objects: T_Xarray) -> Tuple[T_Xarray, ...]: +def unify_chunks(*objects: T_Xarray) -> tuple[T_Xarray, ...]: """ Given any number of Dataset and/or DataArray objects, returns new objects with unified chunk size along all chunked dimensions. diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 7ead1918e1a..4621e622d42 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,18 +1,6 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Dict, - Hashable, - Iterable, - List, - Literal, - Optional, - Set, - Tuple, - Union, - overload, -) +from typing import TYPE_CHECKING, Hashable, Iterable, Literal, overload import pandas as pd @@ -35,31 +23,31 @@ @overload def concat( - objs: Iterable["Dataset"], - dim: Hashable | "DataArray" | pd.Index, - data_vars: concat_options | List[Hashable] = "all", - coords: concat_options | List[Hashable] = "different", + objs: Iterable[Dataset], + dim: Hashable | DataArray | pd.Index, + data_vars: concat_options | list[Hashable] = "all", + coords: concat_options | list[Hashable] = "different", compat: compat_options = "equals", - positions: Optional[Iterable[int]] = None, + positions: Iterable[int] | None = None, fill_value: object = dtypes.NA, join: str = "outer", combine_attrs: str = "override", -) -> "Dataset": +) -> Dataset: ... @overload def concat( - objs: Iterable["DataArray"], - dim: Hashable | "DataArray" | pd.Index, - data_vars: concat_options | List[Hashable] = "all", - coords: concat_options | List[Hashable] = "different", + objs: Iterable[DataArray], + dim: Hashable | DataArray | pd.Index, + data_vars: concat_options | list[Hashable] = "all", + coords: concat_options | list[Hashable] = "different", compat: compat_options = "equals", - positions: Optional[Iterable[int]] = None, + positions: Iterable[int] | None = None, fill_value: object = dtypes.NA, join: str = "outer", combine_attrs: str = "override", -) -> "DataArray": +) -> DataArray: ... @@ -394,14 +382,14 @@ def process_subset_opt(opt, subset): # determine dimensional coordinate names and a dict mapping name to DataArray def _parse_datasets( - datasets: Iterable["Dataset"], -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, int], Set[Hashable], Set[Hashable]]: + datasets: Iterable[Dataset], +) -> tuple[dict[Hashable, Variable], dict[Hashable, int], set[Hashable], set[Hashable]]: - dims: Set[Hashable] = set() - all_coord_names: Set[Hashable] = set() - data_vars: Set[Hashable] = set() # list of data_vars - dim_coords: Dict[Hashable, Variable] = {} # maps dim name to variable - dims_sizes: Dict[Hashable, int] = {} # shared dimension sizes to expand variables + dims: set[Hashable] = set() + all_coord_names: set[Hashable] = set() + data_vars: set[Hashable] = set() # list of data_vars + dim_coords: dict[Hashable, Variable] = {} # maps dim name to variable + dims_sizes: dict[Hashable, int] = {} # shared dimension sizes to expand variables for ds in datasets: dims_sizes.update(ds.dims) @@ -421,16 +409,16 @@ def _parse_datasets( def _dataset_concat( - datasets: List["Dataset"], - dim: Union[str, "DataArray", pd.Index], - data_vars: Union[str, List[str]], - coords: Union[str, List[str]], + datasets: list[Dataset], + dim: str | DataArray | pd.Index, + data_vars: str | list[str], + coords: str | list[str], compat: str, - positions: Optional[Iterable[int]], + positions: Iterable[int] | None, fill_value: object = dtypes.NA, join: str = "outer", combine_attrs: str = "override", -) -> "Dataset": +) -> Dataset: """ Concatenate a sequence of datasets along a new or existing dimension """ @@ -477,7 +465,7 @@ def _dataset_concat( result_vars = {} if variables_to_merge: - to_merge: Dict[Hashable, List[Variable]] = { + to_merge: dict[Hashable, list[Variable]] = { var: [] for var in variables_to_merge } @@ -552,16 +540,16 @@ def ensure_common_dims(vars): def _dataarray_concat( - arrays: Iterable["DataArray"], - dim: Union[str, "DataArray", pd.Index], - data_vars: Union[str, List[str]], - coords: Union[str, List[str]], + arrays: Iterable[DataArray], + dim: str | DataArray | pd.Index, + data_vars: str | list[str], + coords: str | list[str], compat: str, - positions: Optional[Iterable[int]], + positions: Iterable[int] | None, fill_value: object = dtypes.NA, join: str = "outer", combine_attrs: str = "override", -) -> "DataArray": +) -> DataArray: from .dataarray import DataArray arrays = list(arrays) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7f29d3b6320..81aaf5a50e0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6,16 +6,11 @@ TYPE_CHECKING, Any, Callable, - Dict, Hashable, Iterable, - List, Literal, Mapping, - Optional, Sequence, - Tuple, - Union, cast, ) @@ -93,7 +88,7 @@ def _infer_coords_and_dims( shape, coords, dims -) -> "Tuple[Dict[Any, Variable], Tuple[Hashable, ...]]": +) -> tuple[dict[Any, Variable], tuple[Hashable, ...]]: """All the logic for creating a new DataArray""" if ( @@ -131,7 +126,7 @@ def _infer_coords_and_dims( if not isinstance(d, str): raise TypeError(f"dimension {d} is not a string") - new_coords: Dict[Any, Variable] = {} + new_coords: dict[Any, Variable] = {} if utils.is_dict_like(coords): for k, v in coords.items(): @@ -192,10 +187,10 @@ def _check_data_shape(data, coords, dims): class _LocIndexer: __slots__ = ("data_array",) - def __init__(self, data_array: "DataArray"): + def __init__(self, data_array: DataArray): self.data_array = data_array - def __getitem__(self, key) -> "DataArray": + def __getitem__(self, key) -> DataArray: if not utils.is_dict_like(key): # expand the indexer so we can handle Ellipsis labels = indexing.expanded_indexer(key, self.data_array.ndim) @@ -341,11 +336,11 @@ class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): units: degC """ - _cache: Dict[str, Any] - _coords: Dict[Any, Variable] - _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, Index]] - _name: Optional[Hashable] + _cache: dict[str, Any] + _coords: dict[Any, Variable] + _close: Callable[[], None] | None + _indexes: dict[Hashable, Index] | None + _name: Hashable | None _variable: Variable __slots__ = ( @@ -369,12 +364,12 @@ class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): def __init__( self, data: Any = dtypes.NA, - coords: Union[Sequence[Tuple], Mapping[Any, Any], None] = None, - dims: Union[Hashable, Sequence[Hashable], None] = None, + coords: Sequence[tuple] | Mapping[Any, Any] | None = None, + dims: Hashable | Sequence[Hashable] | None = None, name: Hashable = None, attrs: Mapping = None, # internal parameters - indexes: Dict[Hashable, pd.Index] = None, + indexes: dict[Hashable, pd.Index] = None, fastpath: bool = False, ): if fastpath: @@ -425,7 +420,7 @@ def _replace( self: T_DataArray, variable: Variable = None, coords=None, - name: Union[Hashable, None, Default] = _default, + name: Hashable | None | Default = _default, indexes=None, ) -> T_DataArray: if variable is None: @@ -437,8 +432,8 @@ def _replace( return type(self)(variable, coords, name=name, fastpath=True, indexes=indexes) def _replace_maybe_drop_dims( - self, variable: Variable, name: Union[Hashable, None, Default] = _default - ) -> "DataArray": + self, variable: Variable, name: Hashable | None | Default = _default + ) -> DataArray: if variable.dims == self.dims and variable.shape == self.shape: coords = self._coords.copy() indexes = self._indexes @@ -464,7 +459,7 @@ def _replace_maybe_drop_dims( ) return self._replace(variable, coords, name, indexes=indexes) - def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> "DataArray": + def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> DataArray: if not len(indexes): return self coords = self._coords.copy() @@ -473,7 +468,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> "DataArray": obj = self._replace(coords=coords) # switch from dimension to level names, if necessary - dim_names: Dict[Any, str] = {} + dim_names: dict[Any, str] = {} for dim, idx in indexes.items(): pd_idx = idx.to_pandas_index() if not isinstance(idx, pd.MultiIndex) and pd_idx.name != dim: @@ -486,8 +481,8 @@ def _to_temp_dataset(self) -> Dataset: return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False) def _from_temp_dataset( - self, dataset: Dataset, name: Union[Hashable, None, Default] = _default - ) -> "DataArray": + self, dataset: Dataset, name: Hashable | None | Default = _default + ) -> DataArray: variable = dataset._variables.pop(_THIS_ARRAY) coords = dataset._variables indexes = dataset._indexes @@ -580,12 +575,12 @@ def to_dataset( return result @property - def name(self) -> Optional[Hashable]: + def name(self) -> Hashable | None: """The name of this array.""" return self._name @name.setter - def name(self, value: Optional[Hashable]) -> None: + def name(self, value: Hashable | None) -> None: self._name = value @property @@ -598,7 +593,7 @@ def dtype(self) -> np.dtype: return self.variable.dtype @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> tuple[int, ...]: return self.variable.shape @property @@ -687,7 +682,7 @@ def to_index(self) -> pd.Index: return self.variable.to_index() @property - def dims(self) -> Tuple[Hashable, ...]: + def dims(self) -> tuple[Hashable, ...]: """Tuple of dimension names associated with this array. Note that the type of this property is inconsistent with @@ -710,11 +705,11 @@ def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]: return dict(zip(self.dims, key)) @property - def _level_coords(self) -> Dict[Hashable, Hashable]: + def _level_coords(self) -> dict[Hashable, Hashable]: """Return a mapping of all MultiIndex levels and their corresponding coordinate name. """ - level_coords: Dict[Hashable, Hashable] = {} + level_coords: dict[Hashable, Hashable] = {} for cname, var in self._coords.items(): if var.ndim == 1 and isinstance(var, IndexVariable): @@ -737,7 +732,7 @@ def _getitem_coord(self, key): return self._replace_maybe_drop_dims(var, name=key) - def __getitem__(self, key: Any) -> "DataArray": + def __getitem__(self, key: Any) -> DataArray: if isinstance(key, str): return self._getitem_coord(key) else: @@ -790,7 +785,7 @@ def loc(self) -> _LocIndexer: @property # Key type needs to be `Any` because of mypy#4167 - def attrs(self) -> Dict[Any, Any]: + def attrs(self) -> dict[Any, Any]: """Dictionary storing arbitrary metadata with this array.""" return self.variable.attrs @@ -800,7 +795,7 @@ def attrs(self, value: Mapping[Any, Any]) -> None: self.variable.attrs = value # type: ignore[assignment] @property - def encoding(self) -> Dict[Hashable, Any]: + def encoding(self) -> dict[Hashable, Any]: """Dictionary of format-specific settings for how this array should be serialized.""" return self.variable.encoding @@ -837,9 +832,9 @@ def coords(self) -> DataArrayCoordinates: def reset_coords( self, - names: Union[Iterable[Hashable], Hashable, None] = None, + names: Iterable[Hashable] | Hashable | None = None, drop: bool = False, - ) -> Union[None, "DataArray", Dataset]: + ) -> None | DataArray | Dataset: """Given names of coordinates, reset them to become variables. Parameters @@ -904,7 +899,7 @@ def _dask_finalize(results, name, func, *args, **kwargs): coords = ds._variables return DataArray(variable, coords, name=name, fastpath=True) - def load(self, **kwargs) -> "DataArray": + def load(self, **kwargs) -> DataArray: """Manually trigger loading of this array's data from disk or a remote source into memory and return this array. @@ -928,7 +923,7 @@ def load(self, **kwargs) -> "DataArray": self._coords = new._coords return self - def compute(self, **kwargs) -> "DataArray": + def compute(self, **kwargs) -> DataArray: """Manually trigger loading of this array's data from disk or a remote source into memory and return a new array. The original is left unaltered. @@ -950,7 +945,7 @@ def compute(self, **kwargs) -> "DataArray": new = self.copy(deep=False) return new.load(**kwargs) - def persist(self, **kwargs) -> "DataArray": + def persist(self, **kwargs) -> DataArray: """Trigger computation in constituent dask arrays This keeps them as dask arrays but encourages them to keep data in @@ -1045,10 +1040,10 @@ def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray: indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()} return self._replace(variable, coords, indexes=indexes) - def __copy__(self) -> "DataArray": + def __copy__(self) -> DataArray: return self.copy(deep=False) - def __deepcopy__(self, memo=None) -> "DataArray": + def __deepcopy__(self, memo=None) -> DataArray: # memo does nothing but is required for compatibility with # copy.deepcopy return self.copy(deep=True) @@ -1058,7 +1053,7 @@ def __deepcopy__(self, memo=None) -> "DataArray": __hash__ = None # type: ignore[assignment] @property - def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: + def chunks(self) -> tuple[tuple[int, ...], ...] | None: """ Tuple of block lengths for this dataarray's data, in order of dimensions, or None if the underlying data is not a dask array. @@ -1072,7 +1067,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: return self.variable.chunks @property - def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: + def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: """ Mapping from dimension names to block lengths for this dataarray's data, or None if the underlying data is not a dask array. @@ -1092,17 +1087,17 @@ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: def chunk( self, - chunks: Union[ - int, - Literal["auto"], - Tuple[int, ...], - Tuple[Tuple[int, ...], ...], - Mapping[Any, Union[None, int, Tuple[int, ...]]], - ] = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) + chunks: ( + int + | Literal["auto"] + | tuple[int, ...] + | tuple[tuple[int, ...], ...] + | Mapping[Any, None | int | tuple[int, ...]] + ) = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) name_prefix: str = "xarray-", token: str = None, lock: bool = False, - ) -> "DataArray": + ) -> DataArray: """Coerce this array's data into a dask arrays with the given chunks. If this variable is a non-dask array, it will be converted to dask @@ -1144,7 +1139,7 @@ def isel( drop: bool = False, missing_dims: str = "raise", **indexers_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Return a new DataArray whose data is given by integer indexing along the specified dimension(s). @@ -1228,7 +1223,7 @@ def sel( tolerance=None, drop: bool = False, **indexers_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Return a new DataArray whose data is given by selecting index labels along the specified dimension(s). @@ -1341,9 +1336,9 @@ def sel( def head( self, - indexers: Union[Mapping[Any, int], int] = None, + indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Return a new DataArray whose data is given by the the first `n` values along the specified dimension(s). Default `n` = 5 @@ -1358,9 +1353,9 @@ def head( def tail( self, - indexers: Union[Mapping[Any, int], int] = None, + indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Return a new DataArray whose data is given by the the last `n` values along the specified dimension(s). Default `n` = 5 @@ -1375,9 +1370,9 @@ def tail( def thin( self, - indexers: Union[Mapping[Any, int], int] = None, + indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Return a new DataArray whose data is given by each `n` value along the specified dimension(s). @@ -1391,8 +1386,8 @@ def thin( return self._from_temp_dataset(ds) def broadcast_like( - self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None - ) -> "DataArray": + self, other: DataArray | Dataset, exclude: Iterable[Hashable] | None = None + ) -> DataArray: """Broadcast this DataArray against another Dataset or DataArray. This is equivalent to xr.broadcast(other, self)[1] @@ -1466,12 +1461,12 @@ def broadcast_like( def reindex_like( self, - other: Union["DataArray", Dataset], - method: str = None, - tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, + other: DataArray | Dataset, + method: str | None = None, + tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value=dtypes.NA, - ) -> "DataArray": + ) -> DataArray: """Conform this object onto the indexes of another object, filling in missing values with ``fill_value``. The default fill value is NaN. @@ -1534,11 +1529,11 @@ def reindex( self, indexers: Mapping[Any, Any] = None, method: str = None, - tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, + tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value=dtypes.NA, **indexers_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Conform this object onto the indexes of another object, filling in missing values with ``fill_value``. The default fill value is NaN. @@ -1634,7 +1629,7 @@ def interp( assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, **coords_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Multidimensional interpolation of variables. Parameters @@ -1759,11 +1754,11 @@ def interp( def interp_like( self, - other: Union["DataArray", Dataset], + other: DataArray | Dataset, method: str = "linear", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, - ) -> "DataArray": + ) -> DataArray: """Interpolate this object onto the coordinates of another object, filling out of range values with NaN. @@ -1815,9 +1810,9 @@ def interp_like( def rename( self, - new_name_or_name_dict: Union[Hashable, Mapping[Any, Hashable]] = None, + new_name_or_name_dict: Hashable | Mapping[Any, Hashable] = None, **names: Hashable, - ) -> "DataArray": + ) -> DataArray: """Returns a new DataArray with renamed coordinates or a new name. Parameters @@ -1854,7 +1849,7 @@ def rename( def swap_dims( self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs - ) -> "DataArray": + ) -> DataArray: """Returns a new DataArray with swapped dimensions. Parameters @@ -1911,10 +1906,10 @@ def swap_dims( def expand_dims( self, - dim: Union[None, Hashable, Sequence[Hashable], Mapping[Any, Any]] = None, + dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, axis=None, **dim_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Return a new object with an additional axis (or axes) inserted at the corresponding position in the array shape. The new object is a view into the underlying array, not a copy. @@ -1963,10 +1958,10 @@ def expand_dims( def set_index( self, - indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]] = None, + indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None, append: bool = False, - **indexes_kwargs: Union[Hashable, Sequence[Hashable]], - ) -> "DataArray": + **indexes_kwargs: Hashable | Sequence[Hashable], + ) -> DataArray: """Set DataArray (multi-)indexes using one or more existing coordinates. @@ -2020,9 +2015,9 @@ def set_index( def reset_index( self, - dims_or_levels: Union[Hashable, Sequence[Hashable]], + dims_or_levels: Hashable | Sequence[Hashable], drop: bool = False, - ) -> "DataArray": + ) -> DataArray: """Reset the specified index(es) or multi-index level(s). Parameters @@ -2053,7 +2048,7 @@ def reorder_levels( self, dim_order: Mapping[Any, Sequence[int]] = None, **dim_order_kwargs: Sequence[int], - ) -> "DataArray": + ) -> DataArray: """Rearrange index levels using input order. Parameters @@ -2088,7 +2083,7 @@ def stack( self, dimensions: Mapping[Any, Sequence[Hashable]] = None, **dimensions_kwargs: Sequence[Hashable], - ) -> "DataArray": + ) -> DataArray: """ Stack any number of existing dimensions into a single new dimension. @@ -2144,10 +2139,10 @@ def stack( def unstack( self, - dim: Union[Hashable, Sequence[Hashable], None] = None, + dim: Hashable | Sequence[Hashable] | None = None, fill_value: Any = dtypes.NA, sparse: bool = False, - ) -> "DataArray": + ) -> DataArray: """ Unstack existing dimensions corresponding to MultiIndexes into multiple new dimensions. @@ -2278,7 +2273,7 @@ def transpose( *dims: Hashable, transpose_coords: bool = True, missing_dims: str = "raise", - ) -> "DataArray": + ) -> DataArray: """Return a new DataArray object with transposed dimensions. Parameters @@ -2315,7 +2310,7 @@ def transpose( dims = tuple(utils.infix_dims(dims, self.dims, missing_dims)) variable = self.variable.transpose(*dims) if transpose_coords: - coords: Dict[Hashable, Variable] = {} + coords: dict[Hashable, Variable] = {} for name, coord in self.coords.items(): coord_dims = tuple(dim for dim in dims if dim in coord.dims) coords[name] = coord.variable.transpose(*coord_dims) @@ -2324,12 +2319,12 @@ def transpose( return self._replace(variable) @property - def T(self) -> "DataArray": + def T(self) -> DataArray: return self.transpose() def drop_vars( - self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" - ) -> "DataArray": + self, names: Hashable | Iterable[Hashable], *, errors: str = "raise" + ) -> DataArray: """Returns an array with dropped variables. Parameters @@ -2356,7 +2351,7 @@ def drop( *, errors: str = "raise", **labels_kwargs, - ) -> "DataArray": + ) -> DataArray: """Backward compatible method based on `drop_vars` and `drop_sel` Using either `drop_vars` or `drop_sel` is encouraged @@ -2375,7 +2370,7 @@ def drop_sel( *, errors: str = "raise", **labels_kwargs, - ) -> "DataArray": + ) -> DataArray: """Drop index labels from this DataArray. Parameters @@ -2422,9 +2417,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs): dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs) return self._from_temp_dataset(dataset) - def dropna( - self, dim: Hashable, how: str = "any", thresh: int = None - ) -> "DataArray": + def dropna(self, dim: Hashable, how: str = "any", thresh: int = None) -> DataArray: """Returns a new array with dropped labels for missing values along the provided dimension. @@ -2446,7 +2439,7 @@ def dropna( ds = self._to_temp_dataset().dropna(dim, how=how, thresh=thresh) return self._from_temp_dataset(ds) - def fillna(self, value: Any) -> "DataArray": + def fillna(self, value: Any) -> DataArray: """Fill missing values in this object. This operation follows the normal broadcasting and alignment rules that @@ -2478,13 +2471,13 @@ def interpolate_na( dim: Hashable = None, method: str = "linear", limit: int = None, - use_coordinate: Union[bool, str] = True, - max_gap: Union[ - int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta - ] = None, + use_coordinate: bool | str = True, + max_gap: ( + int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta + ) = None, keep_attrs: bool = None, **kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Fill in NaNs by interpolating according to different methods. Parameters @@ -2589,7 +2582,7 @@ def interpolate_na( **kwargs, ) - def ffill(self, dim: Hashable, limit: int = None) -> "DataArray": + def ffill(self, dim: Hashable, limit: int = None) -> DataArray: """Fill NaN values by propogating values forward *Requires bottleneck.* @@ -2614,7 +2607,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> "DataArray": return ffill(self, dim, limit=limit) - def bfill(self, dim: Hashable, limit: int = None) -> "DataArray": + def bfill(self, dim: Hashable, limit: int = None) -> DataArray: """Fill NaN values by propogating values backward *Requires bottleneck.* @@ -2639,7 +2632,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "DataArray": return bfill(self, dim, limit=limit) - def combine_first(self, other: "DataArray") -> "DataArray": + def combine_first(self, other: DataArray) -> DataArray: """Combine two DataArray objects, with union of coordinates. This operation follows the normal broadcasting and alignment rules of @@ -2660,12 +2653,12 @@ def combine_first(self, other: "DataArray") -> "DataArray": def reduce( self, func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, + dim: None | Hashable | Sequence[Hashable] = None, + axis: None | int | Sequence[int] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Reduce this array by applying `func` along some dimension(s). Parameters @@ -2702,7 +2695,7 @@ def reduce( var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, **kwargs) return self._replace_maybe_drop_dims(var) - def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: + def to_pandas(self) -> DataArray | pd.Series | pd.DataFrame: """Convert this array into a pandas object with the same shape. The type of the returned object depends on the number of DataArray @@ -2730,7 +2723,7 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: return constructor(self.values, *indexes) def to_dataframe( - self, name: Hashable = None, dim_order: List[Hashable] = None + self, name: Hashable = None, dim_order: list[Hashable] = None ) -> pd.DataFrame: """Convert this array and its coordinates into a tidy pandas.DataFrame. @@ -2819,7 +2812,7 @@ def to_masked_array(self, copy: bool = True) -> np.ma.MaskedArray: isnull = pd.isnull(values) return np.ma.MaskedArray(data=values, mask=isnull, copy=copy) - def to_netcdf(self, *args, **kwargs) -> Union[bytes, "Delayed", None]: + def to_netcdf(self, *args, **kwargs) -> bytes | Delayed | None: """Write DataArray contents to a netCDF file. All parameters are passed directly to :py:meth:`xarray.Dataset.to_netcdf`. @@ -2878,7 +2871,7 @@ def to_dict(self, data: bool = True) -> dict: return d @classmethod - def from_dict(cls, d: dict) -> "DataArray": + def from_dict(cls, d: dict) -> DataArray: """ Convert a dictionary into an xarray.DataArray @@ -2934,7 +2927,7 @@ def from_dict(cls, d: dict) -> "DataArray": return obj @classmethod - def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray": + def from_series(cls, series: pd.Series, sparse: bool = False) -> DataArray: """Convert a pandas.Series into an xarray.DataArray. If the series's index is a MultiIndex, it will be expanded into a @@ -2956,33 +2949,33 @@ def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray": result.name = series.name return result - def to_cdms2(self) -> "cdms2_Variable": + def to_cdms2(self) -> cdms2_Variable: """Convert this array into a cdms2.Variable""" from ..convert import to_cdms2 return to_cdms2(self) @classmethod - def from_cdms2(cls, variable: "cdms2_Variable") -> "DataArray": + def from_cdms2(cls, variable: cdms2_Variable) -> DataArray: """Convert a cdms2.Variable into an xarray.DataArray""" from ..convert import from_cdms2 return from_cdms2(variable) - def to_iris(self) -> "iris_Cube": + def to_iris(self) -> iris_Cube: """Convert this array into a iris.cube.Cube""" from ..convert import to_iris return to_iris(self) @classmethod - def from_iris(cls, cube: "iris_Cube") -> "DataArray": + def from_iris(cls, cube: iris_Cube) -> DataArray: """Convert a iris.cube.Cube into an xarray.DataArray""" from ..convert import from_iris return from_iris(cube) - def _all_compat(self, other: "DataArray", compat_str: str) -> bool: + def _all_compat(self, other: DataArray, compat_str: str) -> bool: """Helper function for equals, broadcast_equals, and identical""" def compat(x, y): @@ -2992,7 +2985,7 @@ def compat(x, y): self, other ) - def broadcast_equals(self, other: "DataArray") -> bool: + def broadcast_equals(self, other: DataArray) -> bool: """Two DataArrays are broadcast equal if they are equal after broadcasting them against each other such that they have the same dimensions. @@ -3007,7 +3000,7 @@ def broadcast_equals(self, other: "DataArray") -> bool: except (TypeError, AttributeError): return False - def equals(self, other: "DataArray") -> bool: + def equals(self, other: DataArray) -> bool: """True if two DataArrays have the same dimensions, coordinates and values; otherwise False. @@ -3027,7 +3020,7 @@ def equals(self, other: "DataArray") -> bool: except (TypeError, AttributeError): return False - def identical(self, other: "DataArray") -> bool: + def identical(self, other: DataArray) -> bool: """Like equals, but also checks the array name and attributes, and attributes on all coordinates. @@ -3041,7 +3034,7 @@ def identical(self, other: "DataArray") -> bool: except (TypeError, AttributeError): return False - def _result_name(self, other: Any = None) -> Optional[Hashable]: + def _result_name(self, other: Any = None) -> Hashable | None: # use the same naming heuristics as pandas: # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356 other_name = getattr(other, "name", _default) @@ -3050,7 +3043,7 @@ def _result_name(self, other: Any = None) -> Optional[Hashable]: else: return None - def __array_wrap__(self, obj, context=None) -> "DataArray": + def __array_wrap__(self, obj, context=None) -> DataArray: new_var = self.variable.__array_wrap__(obj, context) return self._replace(new_var) @@ -3124,7 +3117,7 @@ def _inplace_binary_op(self, other, f: Callable): ) from exc return self - def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None: + def _copy_attrs_from(self, other: DataArray | Dataset | Variable) -> None: self.attrs = other.attrs plot = utils.UncachedAccessor(_PlotMethods) @@ -3162,7 +3155,7 @@ def _title_for_slice(self, truncate: int = 50) -> str: return title - def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArray": + def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> DataArray: """Calculate the n-th order discrete difference along given axis. Parameters @@ -3213,7 +3206,7 @@ def shift( shifts: Mapping[Any, int] = None, fill_value: Any = dtypes.NA, **shifts_kwargs: int, - ) -> "DataArray": + ) -> DataArray: """Shift this DataArray by an offset along one or more dimensions. Only the data is moved; coordinates stay in place. This is consistent @@ -3263,7 +3256,7 @@ def roll( shifts: Mapping[Hashable, int] = None, roll_coords: bool = False, **shifts_kwargs: int, - ) -> "DataArray": + ) -> DataArray: """Roll this array by an offset along one or more dimensions. Unlike shift, roll treats the given dimensions as periodic, so will not @@ -3308,16 +3301,16 @@ def roll( return self._from_temp_dataset(ds) @property - def real(self) -> "DataArray": + def real(self) -> DataArray: return self._replace(self.variable.real) @property - def imag(self) -> "DataArray": + def imag(self) -> DataArray: return self._replace(self.variable.imag) def dot( - self, other: "DataArray", dims: Union[Hashable, Sequence[Hashable], None] = None - ) -> "DataArray": + self, other: DataArray, dims: Hashable | Sequence[Hashable] | None = None + ) -> DataArray: """Perform dot product of two DataArrays along their shared dims. Equivalent to taking taking tensordot over all shared dims. @@ -3369,9 +3362,9 @@ def dot( def sortby( self, - variables: Union[Hashable, "DataArray", Sequence[Union[Hashable, "DataArray"]]], + variables: Hashable | DataArray | Sequence[Hashable | DataArray], ascending: bool = True, - ) -> "DataArray": + ) -> DataArray: """Sort object by labels or values (along an axis). Sorts the dataarray, either along specified dimensions, @@ -3434,11 +3427,11 @@ def sortby( def quantile( self, q: Any, - dim: Union[Hashable, Sequence[Hashable], None] = None, + dim: Hashable | Sequence[Hashable] | None = None, interpolation: str = "linear", keep_attrs: bool = None, skipna: bool = True, - ) -> "DataArray": + ) -> DataArray: """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -3525,7 +3518,7 @@ def quantile( def rank( self, dim: Hashable, pct: bool = False, keep_attrs: bool = None - ) -> "DataArray": + ) -> DataArray: """Ranks the data. Equal values are assigned a rank that is the average of the ranks that @@ -3566,7 +3559,7 @@ def rank( def differentiate( self, coord: Hashable, edge_order: int = 1, datetime_unit: str = None - ) -> "DataArray": + ) -> DataArray: """ Differentiate the array with the second order accurate central differences. @@ -3625,9 +3618,9 @@ def differentiate( def integrate( self, - coord: Union[Hashable, Sequence[Hashable]] = None, + coord: Hashable | Sequence[Hashable] = None, datetime_unit: str = None, - ) -> "DataArray": + ) -> DataArray: """Integrate along the given coordinate using the trapezoidal rule. .. note:: @@ -3679,9 +3672,9 @@ def integrate( def cumulative_integrate( self, - coord: Union[Hashable, Sequence[Hashable]] = None, + coord: Hashable | Sequence[Hashable] = None, datetime_unit: str = None, - ) -> "DataArray": + ) -> DataArray: """Integrate cumulatively along the given coordinate using the trapezoidal rule. .. note:: @@ -3739,7 +3732,7 @@ def cumulative_integrate( ds = self._to_temp_dataset().cumulative_integrate(coord, datetime_unit) return self._from_temp_dataset(ds) - def unify_chunks(self) -> "DataArray": + def unify_chunks(self) -> DataArray: """Unify chunk size along all chunked dimensions of this DataArray. Returns @@ -3757,8 +3750,8 @@ def map_blocks( self, func: Callable[..., T_Xarray], args: Sequence[Any] = (), - kwargs: Mapping[str, Any] = None, - template: Union["DataArray", "Dataset"] = None, + kwargs: Mapping[str, Any] | None = None, + template: DataArray | Dataset | None = None, ) -> T_Xarray: """ Apply a function to each block of this DataArray. @@ -3861,9 +3854,9 @@ def polyfit( self, dim: Hashable, deg: int, - skipna: bool = None, - rcond: float = None, - w: Union[Hashable, Any] = None, + skipna: bool | None = None, + rcond: float | None = None, + w: Hashable | Any | None = None, full: bool = False, cov: bool = False, ): @@ -3924,16 +3917,18 @@ def polyfit( def pad( self, - pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None, + pad_width: Mapping[Any, int | tuple[int, int]] | None = None, mode: str = "constant", - stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, - constant_values: Union[ - int, Tuple[int, int], Mapping[Any, Tuple[int, int]] - ] = None, - end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, - reflect_type: str = None, + stat_length: int + | tuple[int, int] + | Mapping[Any, tuple[int, int]] + | None = None, + constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]]) + | None = None, + end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, + reflect_type: str | None = None, **pad_width_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Pad this array along one or more dimensions. .. warning:: @@ -4092,7 +4087,7 @@ def idxmin( skipna: bool = None, fill_value: Any = dtypes.NA, keep_attrs: bool = None, - ) -> "DataArray": + ) -> DataArray: """Return the coordinate label of the minimum value along a dimension. Returns a new `DataArray` named after the dimension with the values of @@ -4188,7 +4183,7 @@ def idxmax( skipna: bool = None, fill_value: Any = dtypes.NA, keep_attrs: bool = None, - ) -> "DataArray": + ) -> DataArray: """Return the coordinate label of the maximum value along a dimension. Returns a new `DataArray` named after the dimension with the values of @@ -4280,11 +4275,11 @@ def idxmax( def argmin( self, - dim: Union[Hashable, Sequence[Hashable]] = None, + dim: Hashable | Sequence[Hashable] = None, axis: int = None, keep_attrs: bool = None, skipna: bool = None, - ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: + ) -> DataArray | dict[Hashable, DataArray]: """Index or indices of the minimum of the DataArray over one or more dimensions. If a sequence is passed to 'dim', then result returned as dict of DataArrays, @@ -4383,11 +4378,11 @@ def argmin( def argmax( self, - dim: Union[Hashable, Sequence[Hashable]] = None, + dim: Hashable | Sequence[Hashable] = None, axis: int = None, keep_attrs: bool = None, skipna: bool = None, - ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: + ) -> DataArray | dict[Hashable, DataArray]: """Index or indices of the maximum of the DataArray over one or more dimensions. If a sequence is passed to 'dim', then result returned as dict of DataArrays, @@ -4491,7 +4486,7 @@ def query( engine: str = None, missing_dims: str = "raise", **queries_kwargs: Any, - ) -> "DataArray": + ) -> DataArray: """Return a new data array indexed along the specified dimension(s), where the indexers are given as strings containing Python expressions to be evaluated against the values in the array. @@ -4561,14 +4556,14 @@ def query( def curvefit( self, - coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]], + coords: str | DataArray | Iterable[str | DataArray], func: Callable[..., Any], - reduce_dims: Union[Hashable, Iterable[Hashable]] = None, + reduce_dims: Hashable | Iterable[Hashable] = None, skipna: bool = True, - p0: Dict[str, Any] = None, - bounds: Dict[str, Any] = None, + p0: dict[str, Any] = None, + bounds: dict[str, Any] = None, param_names: Sequence[str] = None, - kwargs: Dict[str, Any] = None, + kwargs: dict[str, Any] = None, ): """ Curve fitting optimization for arbitrary functions. @@ -4640,10 +4635,7 @@ def curvefit( def drop_duplicates( self, dim: Hashable, - keep: Union[ - str, - bool, - ] = "first", + keep: (str | bool) = "first", ): """Returns a new DataArray with duplicate dimension values removed. @@ -4669,10 +4661,10 @@ def convert_calendar( self, calendar: str, dim: str = "time", - align_on: Optional[str] = None, - missing: Optional[Any] = None, - use_cftime: Optional[bool] = None, - ) -> "DataArray": + align_on: str | None = None, + missing: Any | None = None, + use_cftime: bool | None = None, + ) -> DataArray: """Convert the DataArray to another calendar. Only converts the individual timestamps, does not modify any data except @@ -4790,9 +4782,9 @@ def convert_calendar( def interp_calendar( self, - target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"], + target: pd.DatetimeIndex | CFTimeIndex | DataArray, dim: str = "time", - ) -> "DataArray": + ) -> DataArray: """Interpolates the DataArray to another calendar based on decimal year measure. Each timestamp in `source` and `target` are first converted to their decimal diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4e8001ca389..26ef95f64f9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import copy import datetime import inspect @@ -14,18 +16,12 @@ Callable, Collection, DefaultDict, - Dict, Hashable, Iterable, Iterator, - List, Mapping, MutableMapping, - Optional, Sequence, - Set, - Tuple, - Union, cast, overload, ) @@ -144,7 +140,7 @@ def _get_virtual_variable( variables, key: Hashable, level_vars: Mapping = None, dim_sizes: Mapping = None -) -> Tuple[Hashable, Hashable, Variable]: +) -> tuple[Hashable, Hashable, Variable]: """Get a virtual variable (e.g., 'time.year' or a MultiIndex level) from a dict of xarray.Variable objects (if possible) """ @@ -162,7 +158,7 @@ def _get_virtual_variable( raise KeyError(key) split_key = key.split(".", 1) - var_name: Optional[str] + var_name: str | None if len(split_key) == 2: ref_name, var_name = split_key elif len(split_key) == 1: @@ -190,13 +186,13 @@ def _get_virtual_variable( return ref_name, var_name, virtual_var -def calculate_dimensions(variables: Mapping[Any, Variable]) -> Dict[Hashable, int]: +def calculate_dimensions(variables: Mapping[Any, Variable]) -> dict[Hashable, int]: """Calculate the dimensions corresponding to a set of variables. Returns dictionary mapping from dimension names to sizes. Raises ValueError if any of the dimension sizes conflict. """ - dims: Dict[Hashable, int] = {} + dims: dict[Hashable, int] = {} last_used = {} scalar_vars = {k for k, v in variables.items() if not v.dims} for k, var in variables.items(): @@ -217,28 +213,28 @@ def calculate_dimensions(variables: Mapping[Any, Variable]) -> Dict[Hashable, in def merge_indexes( - indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]], + indexes: Mapping[Any, Hashable | Sequence[Hashable]], variables: Mapping[Any, Variable], - coord_names: Set[Hashable], + coord_names: set[Hashable], append: bool = False, -) -> Tuple[Dict[Hashable, Variable], Set[Hashable]]: +) -> tuple[dict[Hashable, Variable], set[Hashable]]: """Merge variables into multi-indexes. Not public API. Used in Dataset and DataArray set_index methods. """ - vars_to_replace: Dict[Hashable, Variable] = {} - vars_to_remove: List[Hashable] = [] - dims_to_replace: Dict[Hashable, Hashable] = {} + vars_to_replace: dict[Hashable, Variable] = {} + vars_to_remove: list[Hashable] = [] + dims_to_replace: dict[Hashable, Hashable] = {} error_msg = "{} is not the name of an existing variable." for dim, var_names in indexes.items(): if isinstance(var_names, str) or not isinstance(var_names, Sequence): var_names = [var_names] - names: List[Hashable] = [] - codes: List[List[int]] = [] - levels: List[List[int]] = [] + names: list[Hashable] = [] + codes: list[list[int]] = [] + levels: list[list[int]] = [] current_index_variable = variables.get(dim) for n in var_names: @@ -301,12 +297,12 @@ def merge_indexes( def split_indexes( - dims_or_levels: Union[Hashable, Sequence[Hashable]], + dims_or_levels: Hashable | Sequence[Hashable], variables: Mapping[Any, Variable], - coord_names: Set[Hashable], + coord_names: set[Hashable], level_coords: Mapping[Any, Hashable], drop: bool = False, -) -> Tuple[Dict[Hashable, Variable], Set[Hashable]]: +) -> tuple[dict[Hashable, Variable], set[Hashable]]: """Extract (multi-)indexes (levels) as variables. Not public API. Used in Dataset and DataArray reset_index @@ -315,7 +311,7 @@ def split_indexes( if isinstance(dims_or_levels, str) or not isinstance(dims_or_levels, Sequence): dims_or_levels = [dims_or_levels] - dim_levels: DefaultDict[Any, List[Hashable]] = defaultdict(list) + dim_levels: DefaultDict[Any, list[Hashable]] = defaultdict(list) dims = [] for k in dims_or_levels: if k in level_coords: @@ -324,7 +320,7 @@ def split_indexes( dims.append(k) vars_to_replace = {} - vars_to_create: Dict[Hashable, Variable] = {} + vars_to_create: dict[Hashable, Variable] = {} vars_to_remove = [] for d in dims: @@ -447,7 +443,7 @@ def _maybe_chunk( return var -def as_dataset(obj: Any) -> "Dataset": +def as_dataset(obj: Any) -> Dataset: """Cast the given object to a Dataset. Handles Datasets, DataArrays and dictionaries of variables. A new Dataset @@ -520,7 +516,7 @@ def _initialize_feasible(lb, ub): class DataVariables(Mapping[Any, "DataArray"]): __slots__ = ("_dataset",) - def __init__(self, dataset: "Dataset"): + def __init__(self, dataset: Dataset): self._dataset = dataset def __iter__(self) -> Iterator[Hashable]: @@ -536,7 +532,7 @@ def __len__(self) -> int: def __contains__(self, key: Hashable) -> bool: return key in self._dataset._variables and key not in self._dataset._coord_names - def __getitem__(self, key: Hashable) -> "DataArray": + def __getitem__(self, key: Hashable) -> DataArray: if key not in self._dataset._coord_names: return cast("DataArray", self._dataset[key]) raise KeyError(key) @@ -561,10 +557,10 @@ def _ipython_key_completions_(self): class _LocIndexer: __slots__ = ("dataset",) - def __init__(self, dataset: "Dataset"): + def __init__(self, dataset: Dataset): self.dataset = dataset - def __getitem__(self, key: Mapping[Any, Any]) -> "Dataset": + def __getitem__(self, key: Mapping[Any, Any]) -> Dataset: if not utils.is_dict_like(key): raise TypeError("can only lookup dictionaries from Dataset.loc") return self.dataset.sel(key) @@ -704,14 +700,14 @@ class Dataset(DataWithCoords, DatasetArithmetic, Mapping): description: Weather related data. """ - _attrs: Optional[Dict[Hashable, Any]] - _cache: Dict[str, Any] - _coord_names: Set[Hashable] - _dims: Dict[Hashable, int] - _encoding: Optional[Dict[Hashable, Any]] - _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, Index]] - _variables: Dict[Hashable, Variable] + _attrs: dict[Hashable, Any] | None + _cache: dict[str, Any] + _coord_names: set[Hashable] + _dims: dict[Hashable, int] + _encoding: dict[Hashable, Any] | None + _close: Callable[[], None] | None + _indexes: dict[Hashable, Index] | None + _variables: dict[Hashable, Variable] __slots__ = ( "_attrs", @@ -768,7 +764,7 @@ def __init__( self._indexes = indexes @classmethod - def load_store(cls, store, decoder=None) -> "Dataset": + def load_store(cls, store, decoder=None) -> Dataset: """Create a new dataset from the contents of a backends.*DataStore object """ @@ -791,7 +787,7 @@ def variables(self) -> Mapping[Hashable, Variable]: return Frozen(self._variables) @property - def attrs(self) -> Dict[Hashable, Any]: + def attrs(self) -> dict[Hashable, Any]: """Dictionary of global attributes on this dataset""" if self._attrs is None: self._attrs = {} @@ -802,7 +798,7 @@ def attrs(self, value: Mapping[Any, Any]) -> None: self._attrs = dict(value) @property - def encoding(self) -> Dict: + def encoding(self) -> dict: """Dictionary of global encoding attributes on this dataset""" if self._encoding is None: self._encoding = {} @@ -839,7 +835,7 @@ def sizes(self) -> Mapping[Hashable, int]: """ return self.dims - def load(self, **kwargs) -> "Dataset": + def load(self, **kwargs) -> Dataset: """Manually trigger loading and/or computation of this dataset's data from disk or a remote source into memory and return this dataset. Unlike compute, the original dataset is modified and returned. @@ -913,11 +909,11 @@ def __dask_layers__(self): import dask return sum( - [ + ( v.__dask_layers__() for v in self.variables.values() if dask.is_dask_collection(v) - ], + ), (), ) @@ -939,7 +935,7 @@ def __dask_postcompute__(self): def __dask_postpersist__(self): return self._dask_postpersist, () - def _dask_postcompute(self, results: "Iterable[Variable]") -> "Dataset": + def _dask_postcompute(self, results: Iterable[Variable]) -> Dataset: import dask variables = {} @@ -963,7 +959,7 @@ def _dask_postcompute(self, results: "Iterable[Variable]") -> "Dataset": def _dask_postpersist( self, dsk: Mapping, *, rename: Mapping[str, str] = None - ) -> "Dataset": + ) -> Dataset: from dask import is_dask_collection from dask.highlevelgraph import HighLevelGraph from dask.optimization import cull @@ -1012,7 +1008,7 @@ def _dask_postpersist( self._close, ) - def compute(self, **kwargs) -> "Dataset": + def compute(self, **kwargs) -> Dataset: """Manually trigger loading and/or computation of this dataset's data from disk or a remote source into memory and return a new dataset. Unlike load, the original dataset is left unaltered. @@ -1034,7 +1030,7 @@ def compute(self, **kwargs) -> "Dataset": new = self.copy(deep=False) return new.load(**kwargs) - def _persist_inplace(self, **kwargs) -> "Dataset": + def _persist_inplace(self, **kwargs) -> Dataset: """Persist all Dask arrays in memory""" # access .data to coerce everything to numpy or dask arrays lazy_data = { @@ -1051,7 +1047,7 @@ def _persist_inplace(self, **kwargs) -> "Dataset": return self - def persist(self, **kwargs) -> "Dataset": + def persist(self, **kwargs) -> Dataset: """Trigger computation, keeping data as dask arrays This operation can be used to trigger computation on underlying dask @@ -1075,14 +1071,14 @@ def persist(self, **kwargs) -> "Dataset": @classmethod def _construct_direct( cls, - variables: Dict[Any, Variable], - coord_names: Set[Hashable], - dims: Dict[Any, int] = None, - attrs: Dict = None, - indexes: Dict[Any, Index] = None, - encoding: Dict = None, + variables: dict[Any, Variable], + coord_names: set[Hashable], + dims: dict[Any, int] = None, + attrs: dict = None, + indexes: dict[Any, Index] = None, + encoding: dict = None, close: Callable[[], None] = None, - ) -> "Dataset": + ) -> Dataset: """Shortcut around __init__ for internal use when we want to skip costly validation """ @@ -1100,14 +1096,14 @@ def _construct_direct( def _replace( self, - variables: Dict[Hashable, Variable] = None, - coord_names: Set[Hashable] = None, - dims: Dict[Any, int] = None, - attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Hashable, Index], None, Default] = _default, - encoding: Union[dict, None, Default] = _default, + variables: dict[Hashable, Variable] = None, + coord_names: set[Hashable] = None, + dims: dict[Any, int] = None, + attrs: dict[Hashable, Any] | None | Default = _default, + indexes: dict[Hashable, Index] | None | Default = _default, + encoding: dict | None | Default = _default, inplace: bool = False, - ) -> "Dataset": + ) -> Dataset: """Fastpath constructor for internal use. Returns an object with optionally with replaced attributes. @@ -1150,12 +1146,12 @@ def _replace( def _replace_with_new_dims( self, - variables: Dict[Hashable, Variable], + variables: dict[Hashable, Variable], coord_names: set = None, - attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Hashable, Index], None, Default] = _default, + attrs: dict[Hashable, Any] | None | Default = _default, + indexes: dict[Hashable, Index] | None | Default = _default, inplace: bool = False, - ) -> "Dataset": + ) -> Dataset: """Replace variables with recalculated dimensions.""" dims = calculate_dimensions(variables) return self._replace( @@ -1164,12 +1160,12 @@ def _replace_with_new_dims( def _replace_vars_and_dims( self, - variables: Dict[Hashable, Variable], + variables: dict[Hashable, Variable], coord_names: set = None, - dims: Dict[Hashable, int] = None, - attrs: Union[Dict[Hashable, Any], None, Default] = _default, + dims: dict[Hashable, int] = None, + attrs: dict[Hashable, Any] | None | Default = _default, inplace: bool = False, - ) -> "Dataset": + ) -> Dataset: """Deprecated version of _replace_with_new_dims(). Unlike _replace_with_new_dims(), this method always recalculates @@ -1181,7 +1177,7 @@ def _replace_vars_and_dims( variables, coord_names, dims, attrs, indexes=None, inplace=inplace ) - def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": + def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> Dataset: if not indexes: return self @@ -1193,7 +1189,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": obj = self._replace(variables, indexes=new_indexes) # switch from dimension to level names, if necessary - dim_names: Dict[Hashable, str] = {} + dim_names: dict[Hashable, str] = {} for dim, idx in indexes.items(): pd_idx = idx.to_pandas_index() if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim: @@ -1202,7 +1198,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": obj = obj.rename(dim_names) return obj - def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": + def copy(self, deep: bool = False, data: Mapping = None) -> Dataset: """Returns a copy of this dataset. If `deep=True`, a deep copy is made of each of the component variables. @@ -1327,7 +1323,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": return self._replace(variables, attrs=attrs) - def as_numpy(self: "Dataset") -> "Dataset": + def as_numpy(self: Dataset) -> Dataset: """ Coerces wrapped data and coordinates into numpy arrays, returning a Dataset. @@ -1340,11 +1336,11 @@ def as_numpy(self: "Dataset") -> "Dataset": return self._replace(variables=numpy_variables) @property - def _level_coords(self) -> Dict[str, Hashable]: + def _level_coords(self) -> dict[str, Hashable]: """Return a mapping of all MultiIndex levels and their corresponding coordinate name. """ - level_coords: Dict[str, Hashable] = {} + level_coords: dict[str, Hashable] = {} for name, index in self.xindexes.items(): # TODO: benbovy - flexible indexes: update when MultIndex has its own xarray class. pd_index = index.to_pandas_index() @@ -1354,13 +1350,13 @@ def _level_coords(self) -> Dict[str, Hashable]: level_coords.update({lname: dim for lname in level_names}) return level_coords - def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": + def _copy_listed(self, names: Iterable[Hashable]) -> Dataset: """Create a new Dataset with the listed variables from this dataset and the all relevant coordinates. Skips all validation. """ - variables: Dict[Hashable, Variable] = {} + variables: dict[Hashable, Variable] = {} coord_names = set() - indexes: Dict[Hashable, Index] = {} + indexes: dict[Hashable, Index] = {} for name in names: try: @@ -1394,7 +1390,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": return self._replace(variables, coord_names, dims, indexes=indexes) - def _construct_dataarray(self, name: Hashable) -> "DataArray": + def _construct_dataarray(self, name: Hashable) -> DataArray: """Construct a DataArray by indexing this dataset""" from .dataarray import DataArray @@ -1407,7 +1403,7 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray": needed_dims = set(variable.dims) - coords: Dict[Hashable, Variable] = {} + coords: dict[Hashable, Variable] = {} # preserve ordering for k in self._variables: if k in self._coord_names and set(self.variables[k].dims) <= needed_dims: @@ -1420,10 +1416,10 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray": return DataArray(variable, coords, name=name, indexes=indexes, fastpath=True) - def __copy__(self) -> "Dataset": + def __copy__(self) -> Dataset: return self.copy(deep=False) - def __deepcopy__(self, memo=None) -> "Dataset": + def __deepcopy__(self, memo=None) -> Dataset: # memo does nothing but is required for compatibility with # copy.deepcopy return self.copy(deep=True) @@ -1482,15 +1478,15 @@ def loc(self) -> _LocIndexer: # FIXME https://github.com/python/mypy/issues/7328 @overload - def __getitem__(self, key: Mapping) -> "Dataset": # type: ignore[misc] + def __getitem__(self, key: Mapping) -> Dataset: # type: ignore[misc] ... @overload - def __getitem__(self, key: Hashable) -> "DataArray": # type: ignore[misc] + def __getitem__(self, key: Hashable) -> DataArray: # type: ignore[misc] ... @overload - def __getitem__(self, key: Any) -> "Dataset": + def __getitem__(self, key: Any) -> Dataset: ... def __getitem__(self, key): @@ -1507,7 +1503,7 @@ def __getitem__(self, key): else: return self._copy_listed(key) - def __setitem__(self, key: Union[Hashable, List[Hashable], Mapping], value) -> None: + def __setitem__(self, key: Hashable | list[Hashable] | Mapping, value) -> None: """Add an array to this dataset. Multiple arrays can be added at the same time, in which case each of the following operations is applied to the respective value. @@ -1616,7 +1612,7 @@ def _setitem_check(self, key, value): f"Variable '{name}': dimension '{dim}' appears in new values " f"but not in the indexed original data" ) - dims = tuple([dim for dim in var_k.dims if dim in val.dims]) + dims = tuple(dim for dim in var_k.dims if dim in val.dims) if dims != val.dims: raise ValueError( f"Variable '{name}': dimension order differs between" @@ -1647,7 +1643,7 @@ def __delitem__(self, key: Hashable) -> None: # https://github.com/python/mypy/issues/4266 __hash__ = None # type: ignore[assignment] - def _all_compat(self, other: "Dataset", compat_str: str) -> bool: + def _all_compat(self, other: Dataset, compat_str: str) -> bool: """Helper function for equals and identical""" # some stores (e.g., scipy) do not seem to preserve order, so don't @@ -1659,7 +1655,7 @@ def compat(x: Variable, y: Variable) -> bool: self._variables, other._variables, compat=compat ) - def broadcast_equals(self, other: "Dataset") -> bool: + def broadcast_equals(self, other: Dataset) -> bool: """Two Datasets are broadcast equal if they are equal after broadcasting all variables against each other. @@ -1677,7 +1673,7 @@ def broadcast_equals(self, other: "Dataset") -> bool: except (TypeError, AttributeError): return False - def equals(self, other: "Dataset") -> bool: + def equals(self, other: Dataset) -> bool: """Two Datasets are equal if they have matching variables and coordinates, all of which are equal. @@ -1697,7 +1693,7 @@ def equals(self, other: "Dataset") -> bool: except (TypeError, AttributeError): return False - def identical(self, other: "Dataset") -> bool: + def identical(self, other: Dataset) -> bool: """Like equals, but also checks all dataset attributes and the attributes on all variables and coordinates. @@ -1746,7 +1742,7 @@ def data_vars(self) -> DataVariables: """Dictionary of DataArray objects corresponding to data variables""" return DataVariables(self) - def set_coords(self, names: "Union[Hashable, Iterable[Hashable]]") -> "Dataset": + def set_coords(self, names: Hashable | Iterable[Hashable]) -> Dataset: """Given names of one or more variables, set them as coordinates Parameters @@ -1777,9 +1773,9 @@ def set_coords(self, names: "Union[Hashable, Iterable[Hashable]]") -> "Dataset": def reset_coords( self, - names: "Union[Hashable, Iterable[Hashable], None]" = None, + names: Hashable | Iterable[Hashable] | None = None, drop: bool = False, - ) -> "Dataset": + ) -> Dataset: """Given names of coordinates, reset them to become variables Parameters @@ -1815,7 +1811,7 @@ def reset_coords( del obj._variables[name] return obj - def dump_to_store(self, store: "AbstractDataStore", **kwargs) -> None: + def dump_to_store(self, store: AbstractDataStore, **kwargs) -> None: """Store dataset contents to a backends.*DataStore object.""" from ..backends.api import dump_to_store @@ -1834,7 +1830,7 @@ def to_netcdf( unlimited_dims: Iterable[Hashable] = None, compute: bool = True, invalid_netcdf: bool = False, - ) -> Union[bytes, "Delayed", None]: + ) -> bytes | Delayed | None: """Write dataset contents to a netCDF file. Parameters @@ -1921,19 +1917,19 @@ def to_netcdf( def to_zarr( self, - store: Union[MutableMapping, str, PathLike] = None, - chunk_store: Union[MutableMapping, str, PathLike] = None, + store: MutableMapping | str | PathLike | None = None, + chunk_store: MutableMapping | str | PathLike | None = None, mode: str = None, synchronizer=None, group: str = None, encoding: Mapping = None, compute: bool = True, - consolidated: Optional[bool] = None, + consolidated: bool | None = None, append_dim: Hashable = None, region: Mapping[str, slice] = None, safe_chunks: bool = True, - storage_options: Dict[str, str] = None, - ) -> "ZarrStore": + storage_options: dict[str, str] = None, + ) -> ZarrStore: """Write dataset contents to a zarr group. Zarr chunks are determined in the following way: @@ -2101,7 +2097,7 @@ def info(self, buf=None) -> None: buf.write("\n".join(lines)) @property - def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]: + def chunks(self) -> Mapping[Hashable, tuple[int, ...]]: """ Mapping from dimension names to block lengths for this dataset's data, or None if the underlying data is not a dask array. @@ -2118,7 +2114,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]: return get_chunksizes(self.variables.values()) @property - def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: + def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: """ Mapping from dimension names to block lengths for this dataset's data, or None if the underlying data is not a dask array. @@ -2136,15 +2132,13 @@ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: def chunk( self, - chunks: Union[ - int, - Literal["auto"], - Mapping[Any, Union[None, int, str, Tuple[int, ...]]], - ] = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) + chunks: ( + int | Literal["auto"] | Mapping[Any, None | int | str | tuple[int, ...]] + ) = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) name_prefix: str = "xarray-", token: str = None, lock: bool = False, - ) -> "Dataset": + ) -> Dataset: """Coerce all arrays in this dataset into dask arrays with the given chunks. @@ -2203,7 +2197,7 @@ def chunk( def _validate_indexers( self, indexers: Mapping[Any, Any], missing_dims: str = "raise" - ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: + ) -> Iterator[tuple[Hashable, int | slice | np.ndarray | Variable]]: """Here we make sure + indexer has a valid keys + indexer is in a valid data type @@ -2247,7 +2241,7 @@ def _validate_indexers( def _validate_interp_indexers( self, indexers: Mapping[Any, Any] - ) -> Iterator[Tuple[Hashable, Variable]]: + ) -> Iterator[tuple[Hashable, Variable]]: """Variant of _validate_indexers to be used for interpolation""" for k, v in self._validate_indexers(indexers): if isinstance(v, Variable): @@ -2311,7 +2305,7 @@ def isel( drop: bool = False, missing_dims: str = "raise", **indexers_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Returns a new dataset with each array indexed along the specified dimension(s). @@ -2366,7 +2360,7 @@ def isel( indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) variables = {} - dims: Dict[Hashable, int] = {} + dims: dict[Hashable, int] = {} coord_names = self._coord_names.copy() indexes = self._indexes.copy() if self._indexes is not None else None @@ -2403,13 +2397,13 @@ def _isel_fancy( *, drop: bool, missing_dims: str = "raise", - ) -> "Dataset": + ) -> Dataset: # Note: we need to preserve the original indexers variable in order to merge the # coords below indexers_list = list(self._validate_indexers(indexers, missing_dims)) - variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, Index] = {} + variables: dict[Hashable, Variable] = {} + indexes: dict[Hashable, Index] = {} for name, var in self.variables.items(): var_indexers = {k: v for k, v in indexers_list if k in var.dims} @@ -2446,7 +2440,7 @@ def sel( tolerance: Number = None, drop: bool = False, **indexers_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Returns a new dataset with each array indexed by tick labels along the specified dimension(s). @@ -2520,9 +2514,9 @@ def sel( def head( self, - indexers: Union[Mapping[Any, int], int] = None, + indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Returns a new dataset with the first `n` values of each array for the specified dimension(s). @@ -2566,9 +2560,9 @@ def head( def tail( self, - indexers: Union[Mapping[Any, int], int] = None, + indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Returns a new dataset with the last `n` values of each array for the specified dimension(s). @@ -2615,9 +2609,9 @@ def tail( def thin( self, - indexers: Union[Mapping[Any, int], int] = None, + indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Returns a new dataset with each array indexed along every `n`-th value for the specified dimension(s) @@ -2663,8 +2657,8 @@ def thin( return self.isel(indexers_slices) def broadcast_like( - self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None - ) -> "Dataset": + self, other: Dataset | DataArray, exclude: Iterable[Hashable] = None + ) -> Dataset: """Broadcast this DataArray against another Dataset or DataArray. This is equivalent to xr.broadcast(other, self)[1] @@ -2688,12 +2682,12 @@ def broadcast_like( def reindex_like( self, - other: Union["Dataset", "DataArray"], + other: Dataset | DataArray, method: str = None, - tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, + tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = dtypes.NA, - ) -> "Dataset": + ) -> Dataset: """Conform this object onto the indexes of another object, filling in missing values with ``fill_value``. The default fill value is NaN. @@ -2755,11 +2749,11 @@ def reindex( self, indexers: Mapping[Any, Any] = None, method: str = None, - tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, + tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = dtypes.NA, **indexers_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Conform this object onto a new set of indexes, filling in missing values with ``fill_value``. The default fill value is NaN. @@ -2969,12 +2963,12 @@ def _reindex( self, indexers: Mapping[Any, Any] = None, method: str = None, - tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None, + tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = dtypes.NA, sparse: bool = False, **indexers_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """ same to _reindex but support sparse option """ @@ -3007,7 +3001,7 @@ def interp( kwargs: Mapping[str, Any] = None, method_non_numeric: str = "nearest", **coords_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Multidimensional interpolation of Dataset. Parameters @@ -3189,8 +3183,8 @@ def _validate_interp_indexer(x, new_x): for k, (index, dest) in validated_indexers.items() } - variables: Dict[Hashable, Variable] = {} - to_reindex: Dict[Hashable, Variable] = {} + variables: dict[Hashable, Variable] = {} + to_reindex: dict[Hashable, Variable] = {} for name, var in obj._variables.items(): if name in indexers: continue @@ -3255,12 +3249,12 @@ def _validate_interp_indexer(x, new_x): def interp_like( self, - other: Union["Dataset", "DataArray"], + other: Dataset | DataArray, method: str = "linear", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, method_non_numeric: str = "nearest", - ) -> "Dataset": + ) -> Dataset: """Interpolate this object onto the coordinates of another object, filling the out of range values with NaN. @@ -3306,8 +3300,8 @@ def interp_like( kwargs = {} coords = alignment.reindex_like_indexers(self, other) - numeric_coords: Dict[Hashable, pd.Index] = {} - object_coords: Dict[Hashable, pd.Index] = {} + numeric_coords: dict[Hashable, pd.Index] = {} + object_coords: dict[Hashable, pd.Index] = {} for k, v in coords.items(): if v.dtype.kind in "uifcMm": numeric_coords[k] = v @@ -3373,7 +3367,7 @@ def rename( self, name_dict: Mapping[Any, Hashable] = None, **names: Hashable, - ) -> "Dataset": + ) -> Dataset: """Returns a new object with renamed variables and dimensions. Parameters @@ -3413,7 +3407,7 @@ def rename( def rename_dims( self, dims_dict: Mapping[Any, Hashable] = None, **dims: Hashable - ) -> "Dataset": + ) -> Dataset: """Returns a new object with renamed dimensions only. Parameters @@ -3458,7 +3452,7 @@ def rename_dims( def rename_vars( self, name_dict: Mapping[Any, Hashable] = None, **names: Hashable - ) -> "Dataset": + ) -> Dataset: """Returns a new object with renamed variables including coordinates Parameters @@ -3496,7 +3490,7 @@ def rename_vars( def swap_dims( self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs - ) -> "Dataset": + ) -> Dataset: """Returns a new object with swapped dimensions. Parameters @@ -3576,8 +3570,8 @@ def swap_dims( coord_names = self._coord_names.copy() coord_names.update({dim for dim in dims_dict.values() if dim in self.variables}) - variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, Index] = {} + variables: dict[Hashable, Variable] = {} + indexes: dict[Hashable, Index] = {} for k, v in self.variables.items(): dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) if k in result_dims: @@ -3602,10 +3596,10 @@ def swap_dims( def expand_dims( self, - dim: Union[None, Hashable, Sequence[Hashable], Mapping[Any, Any]] = None, - axis: Union[None, int, Sequence[int]] = None, + dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, + axis: None | int | Sequence[int] = None, **dim_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Return a new object with an additional axis (or axes) inserted at the corresponding position in the array shape. The new object is a view into the underlying array, not a copy. @@ -3675,7 +3669,7 @@ def expand_dims( " variable name.".format(dim=d) ) - variables: Dict[Hashable, Variable] = {} + variables: dict[Hashable, Variable] = {} coord_names = self._coord_names.copy() # If dim is a dict, then ensure that the values are either integers # or iterables. @@ -3734,10 +3728,10 @@ def expand_dims( def set_index( self, - indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]] = None, + indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None, append: bool = False, - **indexes_kwargs: Union[Hashable, Sequence[Hashable]], - ) -> "Dataset": + **indexes_kwargs: Hashable | Sequence[Hashable], + ) -> Dataset: """Set Dataset (multi-)indexes using one or more existing coordinates or variables. @@ -3798,9 +3792,9 @@ def set_index( def reset_index( self, - dims_or_levels: Union[Hashable, Sequence[Hashable]], + dims_or_levels: Hashable | Sequence[Hashable], drop: bool = False, - ) -> "Dataset": + ) -> Dataset: """Reset the specified index(es) or multi-index level(s). Parameters @@ -3834,7 +3828,7 @@ def reorder_levels( self, dim_order: Mapping[Any, Sequence[int]] = None, **dim_order_kwargs: Sequence[int], - ) -> "Dataset": + ) -> Dataset: """Rearrange index levels using input order. Parameters @@ -3905,7 +3899,7 @@ def stack( self, dimensions: Mapping[Any, Sequence[Hashable]] = None, **dimensions_kwargs: Sequence[Hashable], - ) -> "Dataset": + ) -> Dataset: """ Stack any number of existing dimensions into a single new dimension. @@ -3945,7 +3939,7 @@ def to_stacked_array( sample_dims: Collection, variable_dim: Hashable = "variable", name: Hashable = None, - ) -> "DataArray": + ) -> DataArray: """Combine variables of differing dimensionality into a DataArray without broadcasting. @@ -4062,13 +4056,11 @@ def ensure_stackable(val): return data_array - def _unstack_once( - self, dim: Hashable, fill_value, sparse: bool = False - ) -> "Dataset": + def _unstack_once(self, dim: Hashable, fill_value, sparse: bool = False) -> Dataset: index = self.get_index(dim) index = remove_unused_levels_categories(index) - variables: Dict[Hashable, Variable] = {} + variables: dict[Hashable, Variable] = {} indexes = {k: v for k, v in self.xindexes.items() if k != dim} for name, var in self.variables.items(): @@ -4096,9 +4088,7 @@ def _unstack_once( variables, coord_names=coord_names, indexes=indexes ) - def _unstack_full_reindex( - self, dim: Hashable, fill_value, sparse: bool - ) -> "Dataset": + def _unstack_full_reindex(self, dim: Hashable, fill_value, sparse: bool) -> Dataset: index = self.get_index(dim) index = remove_unused_levels_categories(index) full_idx = pd.MultiIndex.from_product(index.levels, names=index.names) @@ -4114,7 +4104,7 @@ def _unstack_full_reindex( new_dim_names = index.names new_dim_sizes = [lev.size for lev in index.levels] - variables: Dict[Hashable, Variable] = {} + variables: dict[Hashable, Variable] = {} indexes = {k: v for k, v in self.xindexes.items() if k != dim} for name, var in obj.variables.items(): @@ -4138,10 +4128,10 @@ def _unstack_full_reindex( def unstack( self, - dim: Union[Hashable, Iterable[Hashable]] = None, + dim: Hashable | Iterable[Hashable] = None, fill_value: Any = dtypes.NA, sparse: bool = False, - ) -> "Dataset": + ) -> Dataset: """ Unstack existing dimensions corresponding to MultiIndexes into multiple new dimensions. @@ -4228,7 +4218,7 @@ def unstack( result = result._unstack_once(dim, fill_value, sparse) return result - def update(self, other: "CoercibleMapping") -> "Dataset": + def update(self, other: CoercibleMapping) -> Dataset: """Update this dataset's variables with those from another dataset. Just like :py:meth:`dict.update` this is a in-place operation. @@ -4269,13 +4259,13 @@ def update(self, other: "CoercibleMapping") -> "Dataset": def merge( self, - other: Union["CoercibleMapping", "DataArray"], - overwrite_vars: Union[Hashable, Iterable[Hashable]] = frozenset(), + other: CoercibleMapping | DataArray, + overwrite_vars: Hashable | Iterable[Hashable] = frozenset(), compat: str = "no_conflicts", join: str = "outer", fill_value: Any = dtypes.NA, combine_attrs: str = "override", - ) -> "Dataset": + ) -> Dataset: """Merge the arrays of two datasets into a single dataset. This method generally does not allow for overriding data, with the @@ -4366,8 +4356,8 @@ def _assert_all_in_dataset( ) def drop_vars( - self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" - ) -> "Dataset": + self, names: Hashable | Iterable[Hashable], *, errors: str = "raise" + ) -> Dataset: """Drop variables from this dataset. Parameters @@ -4583,8 +4573,8 @@ def drop_isel(self, indexers=None, **indexers_kwargs): return ds def drop_dims( - self, drop_dims: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" - ) -> "Dataset": + self, drop_dims: Hashable | Iterable[Hashable], *, errors: str = "raise" + ) -> Dataset: """Drop dimensions and associated variables from this dataset. Parameters @@ -4624,7 +4614,7 @@ def transpose( self, *dims: Hashable, missing_dims: str = "raise", - ) -> "Dataset": + ) -> Dataset: """Return a new Dataset object with all array dimensions transposed. Although the order of dimensions on each array will change, the dataset @@ -4730,7 +4720,7 @@ def dropna( return self.isel({dim: mask}) - def fillna(self, value: Any) -> "Dataset": + def fillna(self, value: Any) -> Dataset: """Fill missing values in this object. This operation follows the normal broadcasting and alignment rules that @@ -4815,12 +4805,12 @@ def interpolate_na( dim: Hashable = None, method: str = "linear", limit: int = None, - use_coordinate: Union[bool, Hashable] = True, - max_gap: Union[ - int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta - ] = None, + use_coordinate: bool | Hashable = True, + max_gap: ( + int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta + ) = None, **kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Fill in NaNs by interpolating according to different methods. Parameters @@ -4943,7 +4933,7 @@ def interpolate_na( ) return new - def ffill(self, dim: Hashable, limit: int = None) -> "Dataset": + def ffill(self, dim: Hashable, limit: int = None) -> Dataset: """Fill NaN values by propogating values forward *Requires bottleneck.* @@ -4969,7 +4959,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> "Dataset": new = _apply_over_vars_with_dim(ffill, self, dim=dim, limit=limit) return new - def bfill(self, dim: Hashable, limit: int = None) -> "Dataset": + def bfill(self, dim: Hashable, limit: int = None) -> Dataset: """Fill NaN values by propogating values backward *Requires bottleneck.* @@ -4995,7 +4985,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "Dataset": new = _apply_over_vars_with_dim(bfill, self, dim=dim, limit=limit) return new - def combine_first(self, other: "Dataset") -> "Dataset": + def combine_first(self, other: Dataset) -> Dataset: """Combine two Datasets, default to data_vars of self. The new coordinates follow the normal broadcasting and alignment rules @@ -5017,12 +5007,12 @@ def combine_first(self, other: "Dataset") -> "Dataset": def reduce( self, func: Callable, - dim: Union[Hashable, Iterable[Hashable]] = None, + dim: Hashable | Iterable[Hashable] = None, keep_attrs: bool = None, keepdims: bool = False, numeric_only: bool = False, **kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Reduce this dataset by applying `func` along some dimension(s). Parameters @@ -5075,7 +5065,7 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - variables: Dict[Hashable, Variable] = {} + variables: dict[Hashable, Variable] = {} for name, var in self._variables.items(): reduce_dims = [d for d in var.dims if d in dims] if name in self.coords: @@ -5120,7 +5110,7 @@ def map( keep_attrs: bool = None, args: Iterable[Any] = (), **kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Apply a function to each variable in this dataset Parameters @@ -5180,7 +5170,7 @@ def apply( keep_attrs: bool = None, args: Iterable[Any] = (), **kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """ Backward compatible implementation of ``map`` @@ -5197,7 +5187,7 @@ def apply( def assign( self, variables: Mapping[Any, Any] = None, **variables_kwargs: Hashable - ) -> "Dataset": + ) -> Dataset: """Assign new data variables to a Dataset, returning a new object with all the original variables in addition to the new ones. @@ -5322,8 +5312,8 @@ def to_array(self, dim="variable", name=None): ) def _normalize_dim_order( - self, dim_order: List[Hashable] = None - ) -> Dict[Hashable, int]: + self, dim_order: list[Hashable] = None + ) -> dict[Hashable, int]: """ Check the validity of the provided dimensions if any and return the mapping between dimension name and their size. @@ -5351,7 +5341,7 @@ def _normalize_dim_order( return ordered_dims - def to_pandas(self) -> Union[pd.Series, pd.DataFrame]: + def to_pandas(self) -> pd.Series | pd.DataFrame: """Convert this dataset into a pandas object without changing the number of dimensions. The type of the returned object depends on the number of Dataset @@ -5381,7 +5371,7 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]): index = self.coords.to_index([*ordered_dims]) return pd.DataFrame(dict(zip(columns, data)), index=index) - def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame: + def to_dataframe(self, dim_order: list[Hashable] = None) -> pd.DataFrame: """Convert this dataset into a pandas.DataFrame. Non-index variables in this dataset form the columns of the @@ -5413,7 +5403,7 @@ def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame: return self._to_dataframe(ordered_dims=ordered_dims) def _set_sparse_data_from_dataframe( - self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple + self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple ) -> None: from sparse import COO @@ -5445,7 +5435,7 @@ def _set_sparse_data_from_dataframe( self[name] = (dims, data) def _set_numpy_data_from_dataframe( - self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple + self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple ) -> None: if not isinstance(idx, pd.MultiIndex): for name, values in arrays: @@ -5482,7 +5472,7 @@ def _set_numpy_data_from_dataframe( self[name] = (dims, data) @classmethod - def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Dataset": + def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Dataset: """Convert a pandas.DataFrame into an xarray.Dataset Each column will be converted into an independent variable in the @@ -5918,7 +5908,7 @@ def shift( shifts: Mapping[Hashable, int] = None, fill_value: Any = dtypes.NA, **shifts_kwargs: int, - ) -> "Dataset": + ) -> Dataset: """Shift this dataset by an offset along one or more dimensions. @@ -5988,7 +5978,7 @@ def roll( shifts: Mapping[Hashable, int] = None, roll_coords: bool = False, **shifts_kwargs: int, - ) -> "Dataset": + ) -> Dataset: """Roll this dataset by an offset along one or more dimensions. Unlike shift, roll treats the given dimensions as periodic, so will not @@ -6056,7 +6046,7 @@ def roll( variables[k] = var if roll_coords: - indexes: Dict[Hashable, Index] = {} + indexes: dict[Hashable, Index] = {} idx: pd.Index for k, idx in self.xindexes.items(): (dim,) = self.variables[k].dims @@ -6408,9 +6398,9 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): def integrate( self, - coord: Union[Hashable, Sequence[Hashable]], + coord: Hashable | Sequence[Hashable], datetime_unit: str = None, - ) -> "Dataset": + ) -> Dataset: """Integrate along the given coordinate using the trapezoidal rule. .. note:: @@ -6524,9 +6514,9 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): def cumulative_integrate( self, - coord: Union[Hashable, Sequence[Hashable]], + coord: Hashable | Sequence[Hashable], datetime_unit: str = None, - ) -> "Dataset": + ) -> Dataset: """Integrate along the given coordinate using the trapezoidal rule. .. note:: @@ -6702,7 +6692,7 @@ def filter_by_attrs(self, **kwargs): selection.append(var_name) return self[selection] - def unify_chunks(self) -> "Dataset": + def unify_chunks(self) -> Dataset: """Unify chunk size along all chunked dimensions of this Dataset. Returns @@ -6718,11 +6708,11 @@ def unify_chunks(self) -> "Dataset": def map_blocks( self, - func: "Callable[..., T_Xarray]", + func: Callable[..., T_Xarray], args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, - template: Union["DataArray", "Dataset"] = None, - ) -> "T_Xarray": + template: DataArray | Dataset | None = None, + ) -> T_Xarray: """ Apply a function to each block of this Dataset. @@ -6829,9 +6819,9 @@ def polyfit( deg: int, skipna: bool = None, rcond: float = None, - w: Union[Hashable, Any] = None, + w: Hashable | Any = None, full: bool = False, - cov: Union[bool, str] = False, + cov: bool | str = False, ): """ Least squares polynomial fit. @@ -6895,7 +6885,7 @@ def polyfit( skipna_da = skipna x = get_clean_interp_index(self, dim, strict=False) - xname = "{}_".format(self[dim].name) + xname = f"{self[dim].name}_" order = int(deg) + 1 lhs = np.vander(x, order) @@ -6912,7 +6902,7 @@ def polyfit( if w.ndim != 1: raise TypeError("Expected a 1-d array for weights.") if w.shape[0] != lhs.shape[0]: - raise TypeError("Expected w and {} to have the same length".format(dim)) + raise TypeError(f"Expected w and {dim} to have the same length") lhs *= w[:, np.newaxis] # Scaling @@ -6949,7 +6939,7 @@ def polyfit( skipna_da = bool(np.any(da.isnull())) dims_to_stack = [dimname for dimname in da.dims if dimname != dim] - stacked_coords: Dict[Hashable, DataArray] = {} + stacked_coords: dict[Hashable, DataArray] = {} if dims_to_stack: stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked") rhs = da.transpose(dim, *dims_to_stack).stack( @@ -6975,7 +6965,7 @@ def polyfit( ) if isinstance(name, str): - name = "{}_".format(name) + name = f"{name}_" else: # Thus a ReprObject => polyfit was called on a DataArray name = "" @@ -7019,16 +7009,19 @@ def polyfit( def pad( self, - pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None, + pad_width: Mapping[Any, int | tuple[int, int]] = None, mode: str = "constant", - stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, - constant_values: Union[ - int, Tuple[int, int], Mapping[Any, Tuple[int, int]] - ] = None, - end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, + stat_length: int + | tuple[int, int] + | Mapping[Any, tuple[int, int]] + | None = None, + constant_values: ( + int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None + ) = None, + end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, reflect_type: str = None, **pad_width_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Pad this dataset along one or more dimensions. .. warning:: @@ -7175,7 +7168,7 @@ def idxmin( skipna: bool = None, fill_value: Any = dtypes.NA, keep_attrs: bool = None, - ) -> "Dataset": + ) -> Dataset: """Return the coordinate label of the minimum value along a dimension. Returns a new `Dataset` named after the dimension with the values of @@ -7272,7 +7265,7 @@ def idxmax( skipna: bool = None, fill_value: Any = dtypes.NA, keep_attrs: bool = None, - ) -> "Dataset": + ) -> Dataset: """Return the coordinate label of the maximum value along a dimension. Returns a new `Dataset` named after the dimension with the values of @@ -7485,7 +7478,7 @@ def query( engine: str = None, missing_dims: str = "raise", **queries_kwargs: Any, - ) -> "Dataset": + ) -> Dataset: """Return a new dataset with each array indexed along the specified dimension(s), where the indexers are given as strings containing Python expressions to be evaluated against the data variables in the @@ -7576,14 +7569,14 @@ def query( def curvefit( self, - coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]], + coords: str | DataArray | Iterable[str | DataArray], func: Callable[..., Any], - reduce_dims: Union[Hashable, Iterable[Hashable]] = None, + reduce_dims: Hashable | Iterable[Hashable] = None, skipna: bool = True, - p0: Dict[str, Any] = None, - bounds: Dict[str, Any] = None, + p0: dict[str, Any] = None, + bounds: dict[str, Any] = None, param_names: Sequence[str] = None, - kwargs: Dict[str, Any] = None, + kwargs: dict[str, Any] = None, ): """ Curve fitting optimization for arbitrary functions. @@ -7753,10 +7746,10 @@ def convert_calendar( self, calendar: str, dim: str = "time", - align_on: Optional[str] = None, - missing: Optional[Any] = None, - use_cftime: Optional[bool] = None, - ) -> "Dataset": + align_on: str | None = None, + missing: Any | None = None, + use_cftime: bool | None = None, + ) -> Dataset: """Convert the Dataset to another calendar. Only converts the individual timestamps, does not modify any data except @@ -7874,9 +7867,9 @@ def convert_calendar( def interp_calendar( self, - target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"], + target: pd.DatetimeIndex | CFTimeIndex | DataArray, dim: str = "time", - ) -> "Dataset": + ) -> Dataset: """Interpolates the Dataset to another calendar based on decimal year measure. Each timestamp in `source` and `target` are first converted to their decimal diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index c0633064231..2a9f8a27815 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -306,7 +306,7 @@ def summarize_variable( def _summarize_coord_multiindex(coord, col_width, marker): first_col = pretty_print(f" {marker} {coord.name} ", col_width) - return "{}({}) MultiIndex".format(first_col, str(coord.dims[0])) + return f"{first_col}({str(coord.dims[0])}) MultiIndex" def _summarize_coord_levels(coord, col_width, marker="-"): @@ -622,7 +622,7 @@ def array_repr(arr): def dataset_repr(ds): - summary = ["".format(type(ds).__name__)] + summary = [f""] col_width = _calculate_col_width(_get_col_items(ds.variables)) max_rows = OPTIONS["display_max_rows"] diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 072a932b943..36c252f276e 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -266,7 +266,7 @@ def _obj_repr(obj, header_components, sections): def array_repr(arr): dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape)) - obj_type = "xarray.{}".format(type(arr).__name__) + obj_type = f"xarray.{type(arr).__name__}" arr_name = f"'{arr.name}'" if getattr(arr, "name", None) else "" coord_names = list(arr.coords) if hasattr(arr, "coords") else [] @@ -287,7 +287,7 @@ def array_repr(arr): def dataset_repr(ds): - obj_type = "xarray.{}".format(type(ds).__name__) + obj_type = f"xarray.{type(ds).__name__}" header_components = [f"
{escape(obj_type)}
"] diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 1ded35264f4..844751f24bb 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -296,7 +296,7 @@ def from_variables(cls, variables: Mapping[Any, "Variable"]): if any([var.ndim != 1 for var in variables.values()]): raise ValueError("PandasMultiIndex only accepts 1-dimensional variables") - dims = set([var.dims for var in variables.values()]) + dims = {var.dims for var in variables.values()} if len(dims) != 1: raise ValueError( "unmatched dimensions for variables " diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c93d797266b..581572cd0e1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -579,7 +579,7 @@ def as_indexable(array): if hasattr(array, "__array_function__"): return NdArrayLikeIndexingAdapter(array) - raise TypeError("Invalid array type: {}".format(type(array))) + raise TypeError(f"Invalid array type: {type(array)}") def _outer_to_vectorized_indexer(key, shape): @@ -1051,7 +1051,7 @@ def create_mask(indexer, shape, data=None): mask = any(k == -1 for k in indexer.tuple) else: - raise TypeError("unexpected key type: {}".format(type(indexer))) + raise TypeError(f"unexpected key type: {type(indexer)}") return mask @@ -1149,7 +1149,7 @@ def _indexing_array_and_key(self, key): # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes). key = key.tuple + (Ellipsis,) else: - raise TypeError("unexpected key type: {}".format(type(key))) + raise TypeError(f"unexpected key type: {type(key)}") return array, key diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 460e02ae10f..d5307678f89 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -4,15 +4,12 @@ TYPE_CHECKING, AbstractSet, Any, - Dict, Hashable, Iterable, - List, Mapping, NamedTuple, Optional, Sequence, - Set, Tuple, Union, ) @@ -66,12 +63,12 @@ def __init__(self, func): self.func = func -def broadcast_dimension_size(variables: List[Variable]) -> Dict[Hashable, int]: +def broadcast_dimension_size(variables: list[Variable]) -> dict[Hashable, int]: """Extract dimension sizes from a dictionary of variables. Raises ValueError if any dimensions have different sizes. """ - dims: Dict[Hashable, int] = {} + dims: dict[Hashable, int] = {} for var in variables: for dim, size in zip(var.dims, var.shape): if dim in dims and size != dims[dim]: @@ -89,7 +86,7 @@ class MergeError(ValueError): def unique_variable( name: Hashable, - variables: List[Variable], + variables: list[Variable], compat: str = "broadcast_equals", equals: bool = None, ) -> Variable: @@ -162,20 +159,18 @@ def unique_variable( def _assert_compat_valid(compat): if compat not in _VALID_COMPAT: - raise ValueError( - "compat={!r} invalid: must be {}".format(compat, set(_VALID_COMPAT)) - ) + raise ValueError(f"compat={compat!r} invalid: must be {set(_VALID_COMPAT)}") MergeElement = Tuple[Variable, Optional[Index]] def merge_collected( - grouped: Dict[Hashable, List[MergeElement]], + grouped: dict[Hashable, list[MergeElement]], prioritized: Mapping[Any, MergeElement] = None, compat: str = "minimal", combine_attrs="override", -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: +) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge dicts of variables, while resolving conflicts appropriately. Parameters @@ -196,8 +191,8 @@ def merge_collected( _assert_compat_valid(compat) - merged_vars: Dict[Hashable, Variable] = {} - merged_indexes: Dict[Hashable, Index] = {} + merged_vars: dict[Hashable, Variable] = {} + merged_indexes: dict[Hashable, Index] = {} for name, elements_list in grouped.items(): if name in prioritized: @@ -255,8 +250,8 @@ def merge_collected( def collect_variables_and_indexes( - list_of_mappings: List[DatasetLike], -) -> Dict[Hashable, List[MergeElement]]: + list_of_mappings: list[DatasetLike], +) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes from list of mappings of xarray objects. Mappings must either be Dataset objects, or have values of one of the @@ -269,7 +264,7 @@ def collect_variables_and_indexes( from .dataarray import DataArray from .dataset import Dataset - grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {} + grouped: dict[Hashable, list[tuple[Variable, Index | None]]] = {} def append(name, variable, index): values = grouped.setdefault(name, []) @@ -307,10 +302,10 @@ def append_all(variables, indexes): def collect_from_coordinates( - list_of_coords: "List[Coordinates]", -) -> Dict[Hashable, List[MergeElement]]: + list_of_coords: list[Coordinates], +) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes to be merged from Coordinate objects.""" - grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {} + grouped: dict[Hashable, list[tuple[Variable, Index | None]]] = {} for coords in list_of_coords: variables = coords.variables @@ -322,11 +317,11 @@ def collect_from_coordinates( def merge_coordinates_without_align( - objects: "List[Coordinates]", + objects: list[Coordinates], prioritized: Mapping[Any, MergeElement] = None, exclude_dims: AbstractSet = frozenset(), combine_attrs: str = "override", -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: +) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge variables/indexes from coordinates without automatic alignments. This function is used for merging coordinate from pre-existing xarray @@ -335,7 +330,7 @@ def merge_coordinates_without_align( collected = collect_from_coordinates(objects) if exclude_dims: - filtered: Dict[Hashable, List[MergeElement]] = {} + filtered: dict[Hashable, list[MergeElement]] = {} for name, elements in collected.items(): new_elements = [ (variable, index) @@ -351,8 +346,8 @@ def merge_coordinates_without_align( def determine_coords( - list_of_mappings: Iterable["DatasetLike"], -) -> Tuple[Set[Hashable], Set[Hashable]]: + list_of_mappings: Iterable[DatasetLike], +) -> tuple[set[Hashable], set[Hashable]]: """Given a list of dicts with xarray object values, identify coordinates. Parameters @@ -370,8 +365,8 @@ def determine_coords( from .dataarray import DataArray from .dataset import Dataset - coord_names: Set[Hashable] = set() - noncoord_names: Set[Hashable] = set() + coord_names: set[Hashable] = set() + noncoord_names: set[Hashable] = set() for mapping in list_of_mappings: if isinstance(mapping, Dataset): @@ -388,7 +383,7 @@ def determine_coords( return coord_names, noncoord_names -def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["DatasetLike"]: +def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLike]: """Convert pandas values found in a list of labeled objects. Parameters @@ -408,7 +403,7 @@ def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["Dataset out = [] for obj in objects: if isinstance(obj, Dataset): - variables: "DatasetLike" = obj + variables: DatasetLike = obj else: variables = {} if isinstance(obj, PANDAS_TYPES): @@ -422,8 +417,8 @@ def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["Dataset def _get_priority_vars_and_indexes( - objects: List["DatasetLike"], priority_arg: Optional[int], compat: str = "equals" -) -> Dict[Hashable, MergeElement]: + objects: list[DatasetLike], priority_arg: int | None, compat: str = "equals" +) -> dict[Hashable, MergeElement]: """Extract the priority variable from a list of mappings. We need this method because in some cases the priority argument itself @@ -448,20 +443,20 @@ def _get_priority_vars_and_indexes( collected = collect_variables_and_indexes([objects[priority_arg]]) variables, indexes = merge_collected(collected, compat=compat) - grouped: Dict[Hashable, MergeElement] = {} + grouped: dict[Hashable, MergeElement] = {} for name, variable in variables.items(): grouped[name] = (variable, indexes.get(name)) return grouped def merge_coords( - objects: Iterable["CoercibleMapping"], + objects: Iterable[CoercibleMapping], compat: str = "minimal", join: str = "outer", - priority_arg: Optional[int] = None, - indexes: Optional[Mapping[Any, Index]] = None, + priority_arg: int | None = None, + indexes: Mapping[Any, Index] | None = None, fill_value: object = dtypes.NA, -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: +) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge coordinate variables. See merge_core below for argument descriptions. This works similarly to @@ -568,21 +563,21 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): class _MergeResult(NamedTuple): - variables: Dict[Hashable, Variable] - coord_names: Set[Hashable] - dims: Dict[Hashable, int] - indexes: Dict[Hashable, pd.Index] - attrs: Dict[Hashable, Any] + variables: dict[Hashable, Variable] + coord_names: set[Hashable] + dims: dict[Hashable, int] + indexes: dict[Hashable, pd.Index] + attrs: dict[Hashable, Any] def merge_core( - objects: Iterable["CoercibleMapping"], + objects: Iterable[CoercibleMapping], compat: str = "broadcast_equals", join: str = "outer", - combine_attrs: Optional[str] = "override", - priority_arg: Optional[int] = None, - explicit_coords: Optional[Sequence] = None, - indexes: Optional[Mapping[Any, Any]] = None, + combine_attrs: str | None = "override", + priority_arg: int | None = None, + explicit_coords: Sequence | None = None, + indexes: Mapping[Any, Any] | None = None, fill_value: object = dtypes.NA, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -667,12 +662,12 @@ def merge_core( def merge( - objects: Iterable[Union["DataArray", "CoercibleMapping"]], + objects: Iterable[DataArray | CoercibleMapping], compat: str = "no_conflicts", join: str = "outer", fill_value: object = dtypes.NA, combine_attrs: str = "override", -) -> "Dataset": +) -> Dataset: """Merge any number of xarray objects into a single Dataset as variables. Parameters @@ -913,9 +908,9 @@ def merge( def dataset_merge_method( - dataset: "Dataset", - other: "CoercibleMapping", - overwrite_vars: Union[Hashable, Iterable[Hashable]], + dataset: Dataset, + other: CoercibleMapping, + overwrite_vars: Hashable | Iterable[Hashable], compat: str, join: str, fill_value: Any, @@ -938,8 +933,8 @@ def dataset_merge_method( objs = [dataset, other] priority_arg = 1 else: - other_overwrite: Dict[Hashable, CoercibleValue] = {} - other_no_overwrite: Dict[Hashable, CoercibleValue] = {} + other_overwrite: dict[Hashable, CoercibleValue] = {} + other_no_overwrite: dict[Hashable, CoercibleValue] = {} for k, v in other.items(): if k in overwrite_vars: other_overwrite[k] = v @@ -958,9 +953,7 @@ def dataset_merge_method( ) -def dataset_update_method( - dataset: "Dataset", other: "CoercibleMapping" -) -> _MergeResult: +def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeResult: """Guts of the Dataset.update method. This drops a duplicated coordinates from `other` if `other` is not an diff --git a/xarray/core/missing.py b/xarray/core/missing.py index acfbb032c23..2525272f719 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -721,7 +721,7 @@ def interp_func(var, x, new_x, method, kwargs): _, rechunked = da.unify_chunks(*args) - args = tuple([elem for pair in zip(rechunked, args[1::2]) for elem in pair]) + args = tuple(elem for pair in zip(rechunked, args[1::2]) for elem in pair) new_x = rechunked[1 + (len(rechunked) - 1) // 2 :] diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index aad1d285377..3f6bb34a36e 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -8,14 +8,10 @@ Any, Callable, DefaultDict, - Dict, Hashable, Iterable, - List, Mapping, Sequence, - Tuple, - Union, ) import numpy as np @@ -53,7 +49,7 @@ def assert_chunks_compatible(a: Dataset, b: Dataset): def check_result_variables( - result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str + result: DataArray | Dataset, expected: Mapping[str, Any], kind: str ): if kind == "coords": @@ -126,7 +122,7 @@ def make_meta(obj): def infer_template( - func: Callable[..., T_Xarray], obj: Union[DataArray, Dataset], *args, **kwargs + func: Callable[..., T_Xarray], obj: DataArray | Dataset, *args, **kwargs ) -> T_Xarray: """Infer return object by running the function on meta objects.""" meta_args = [make_meta(arg) for arg in (obj,) + args] @@ -148,7 +144,7 @@ def infer_template( return template -def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]: +def make_dict(x: DataArray | Dataset) -> dict[Hashable, Any]: """Map variable name to numpy(-like) data (Dataset.to_dict() is too complicated). """ @@ -167,10 +163,10 @@ def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping def map_blocks( func: Callable[..., T_Xarray], - obj: Union[DataArray, Dataset], + obj: DataArray | Dataset, args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, - template: Union[DataArray, Dataset] = None, + template: DataArray | Dataset | None = None, ) -> T_Xarray: """Apply a function to each block of a DataArray or Dataset. @@ -271,7 +267,7 @@ def map_blocks( def _wrapper( func: Callable, - args: List, + args: list, kwargs: dict, arg_is_array: Iterable[bool], expected: dict, @@ -415,8 +411,8 @@ def _wrapper( # for each variable in the dataset, which is the result of the # func applied to the values. - graph: Dict[Any, Any] = {} - new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict) + graph: dict[Any, Any] = {} + new_layers: DefaultDict[str, dict[Any, Any]] = collections.defaultdict(dict) gname = "{}-{}".format( dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs) ) @@ -516,14 +512,14 @@ def subset_dataset_to_block( graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected) # mapping from variable name to dask graph key - var_key_map: Dict[Hashable, str] = {} + var_key_map: dict[Hashable, str] = {} for name, variable in template.variables.items(): if name in indexes: continue gname_l = f"{name}-{gname}" var_key_map[name] = gname_l - key: Tuple[Any, ...] = (gname_l,) + key: tuple[Any, ...] = (gname_l,) for dim in variable.dims: if dim in chunk_index: key += (chunk_index[dim],) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 0cac9f2b129..0bc07c1aaeb 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -175,7 +175,7 @@ def _mapping_to_list( return [arg] else: raise ValueError( - "Mapping argument is necessary for {}d-rolling.".format(len(self.dim)) + f"Mapping argument is necessary for {len(self.dim)}d-rolling." ) def _get_keep_attrs(self, keep_attrs): @@ -803,7 +803,7 @@ def __repr__(self): """provide a nice str repr of our coarsen object""" attrs = [ - "{k}->{v}".format(k=k, v=getattr(self, k)) + f"{k}->{getattr(self, k)}" for k in self._attributes if getattr(self, k, None) is not None ] diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 7a8b0be9bd4..9fd097cd4dc 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Generic, Mapping, Union +from typing import Any, Generic, Mapping import numpy as np from packaging.version import Version @@ -101,7 +101,7 @@ class RollingExp(Generic[T_Xarray]): def __init__( self, obj: T_Xarray, - windows: Mapping[Any, Union[int, float]], + windows: Mapping[Any, int | float], window_type: str = "span", ): self.obj: T_Xarray = obj diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 68615eef74f..a9ea0acb267 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -468,7 +468,7 @@ def __contains__(self, key: object) -> bool: return key in self.mapping def __repr__(self) -> str: - return "{}({!r})".format(type(self).__name__, self.mapping) + return f"{type(self).__name__}({self.mapping!r})" def FrozenDict(*args, **kwargs) -> Frozen: @@ -544,7 +544,7 @@ def update(self, values: Iterable[T]) -> None: self._d[v] = None def __repr__(self) -> str: - return "{}({!r})".format(type(self).__name__, list(self)) + return f"{type(self).__name__}({list(self)!r})" class NdimSizeLenMixin: @@ -592,7 +592,7 @@ def __getitem__(self: Any, key): return self.array[key] def __repr__(self: Any) -> str: - return "{}(array={!r})".format(type(self).__name__, self.array) + return f"{type(self).__name__}(array={self.array!r})" class ReprObject: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 58aeceed3b1..08af2e694df 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -6,18 +6,7 @@ import warnings from collections import defaultdict from datetime import timedelta -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Hashable, - List, - Mapping, - Optional, - Sequence, - Tuple, - Union, -) +from typing import TYPE_CHECKING, Any, Hashable, Mapping, Sequence import numpy as np import pandas as pd @@ -80,7 +69,7 @@ class MissingDimensionsError(ValueError): # TODO: move this to an xarray.exceptions module? -def as_variable(obj, name=None) -> Union[Variable, IndexVariable]: +def as_variable(obj, name=None) -> Variable | IndexVariable: """Convert an object into a Variable. Parameters @@ -136,7 +125,7 @@ def as_variable(obj, name=None) -> Union[Variable, IndexVariable]: elif isinstance(obj, (pd.Index, IndexVariable)) and obj.name is not None: obj = Variable(obj.name, obj) elif isinstance(obj, (set, dict)): - raise TypeError("variable {!r} has invalid type {!r}".format(name, type(obj))) + raise TypeError(f"variable {name!r} has invalid type {type(obj)!r}") elif name is not None: data = as_compatible_data(obj) if data.ndim != 1: @@ -865,7 +854,7 @@ def __setitem__(self, key, value): indexable[index_tuple] = value @property - def attrs(self) -> Dict[Hashable, Any]: + def attrs(self) -> dict[Hashable, Any]: """Dictionary of local attributes on this variable.""" if self._attrs is None: self._attrs = {} @@ -999,7 +988,7 @@ def __deepcopy__(self, memo=None): __hash__ = None # type: ignore[assignment] @property - def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: + def chunks(self) -> tuple[tuple[int, ...], ...] | None: """ Tuple of block lengths for this dataarray's data, in order of dimensions, or None if the underlying data is not a dask array. @@ -1013,7 +1002,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: return getattr(self._data, "chunks", None) @property - def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: + def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: """ Mapping from dimension names to block lengths for this variable's data, or None if the underlying data is not a dask array. @@ -1282,7 +1271,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): def _pad_options_dim_to_index( self, - pad_option: Mapping[Any, Union[int, Tuple[int, int]]], + pad_option: Mapping[Any, int | tuple[int, int]], fill_with_shape=False, ): if fill_with_shape: @@ -1294,14 +1283,16 @@ def _pad_options_dim_to_index( def pad( self, - pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None, + pad_width: Mapping[Any, int | tuple[int, int]] | None = None, mode: str = "constant", - stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, - constant_values: Union[ - int, Tuple[int, int], Mapping[Any, Tuple[int, int]] - ] = None, - end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None, - reflect_type: str = None, + stat_length: int + | tuple[int, int] + | Mapping[Any, tuple[int, int]] + | None = None, + constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]]) + | None = None, + end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, + reflect_type: str | None = None, **pad_width_kwargs: Any, ): """ @@ -1438,7 +1429,7 @@ def transpose( self, *dims, missing_dims: str = "raise", - ) -> "Variable": + ) -> Variable: """Return a new Variable object with transposed dimensions. Parameters @@ -1483,7 +1474,7 @@ def transpose( return self._replace(dims=dims, data=data) @property - def T(self) -> "Variable": + def T(self) -> Variable: return self.transpose() def set_dims(self, dims, shape=None): @@ -1535,7 +1526,7 @@ def set_dims(self, dims, shape=None): ) return expanded_var.transpose(*dims) - def _stack_once(self, dims: List[Hashable], new_dim: Hashable): + def _stack_once(self, dims: list[Hashable], new_dim: Hashable): if not set(dims) <= set(self.dims): raise ValueError(f"invalid existing dimensions: {dims}") @@ -1593,7 +1584,7 @@ def stack(self, dimensions=None, **dimensions_kwargs): def _unstack_once_full( self, dims: Mapping[Any, int], old_dim: Hashable - ) -> "Variable": + ) -> Variable: """ Unstacks the variable without needing an index. @@ -1634,7 +1625,7 @@ def _unstack_once( dim: Hashable, fill_value=dtypes.NA, sparse: bool = False, - ) -> "Variable": + ) -> Variable: """ Unstacks this variable given an index to unstack and the name of the dimension to which the index refers. @@ -2109,9 +2100,7 @@ def rank(self, dim, pct=False): "prior to calling this method." ) elif not isinstance(data, np.ndarray): - raise TypeError( - "rank is not implemented for {} objects.".format(type(data)) - ) + raise TypeError(f"rank is not implemented for {type(data)} objects.") axis = self.get_axis_num(dim) func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata @@ -2455,11 +2444,11 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float): def _unravel_argminmax( self, argminmax: str, - dim: Union[Hashable, Sequence[Hashable], None], - axis: Union[int, None], - keep_attrs: Optional[bool], - skipna: Optional[bool], - ) -> Union["Variable", Dict[Hashable, "Variable"]]: + dim: Hashable | Sequence[Hashable] | None, + axis: int | None, + keep_attrs: bool | None, + skipna: bool | None, + ) -> Variable | dict[Hashable, Variable]: """Apply argmin or argmax over one or more dimensions, returning the result as a dict of DataArray that can be passed directly to isel. """ @@ -2524,11 +2513,11 @@ def _unravel_argminmax( def argmin( self, - dim: Union[Hashable, Sequence[Hashable]] = None, + dim: Hashable | Sequence[Hashable] = None, axis: int = None, keep_attrs: bool = None, skipna: bool = None, - ) -> Union["Variable", Dict[Hashable, "Variable"]]: + ) -> Variable | dict[Hashable, Variable]: """Index or indices of the minimum of the Variable over one or more dimensions. If a sequence is passed to 'dim', then result returned as dict of Variables, which can be passed directly to isel(). If a single str is passed to 'dim' then @@ -2569,11 +2558,11 @@ def argmin( def argmax( self, - dim: Union[Hashable, Sequence[Hashable]] = None, + dim: Hashable | Sequence[Hashable] = None, axis: int = None, keep_attrs: bool = None, skipna: bool = None, - ) -> Union["Variable", Dict[Hashable, "Variable"]]: + ) -> Variable | dict[Hashable, Variable]: """Index or indices of the maximum of the Variable over one or more dimensions. If a sequence is passed to 'dim', then result returned as dict of Variables, which can be passed directly to isel(). If a single str is passed to 'dim' then @@ -2801,7 +2790,7 @@ def to_index(self): # set default names for multi-index unnamed levels so that # we can safely rename dimension / coordinate later valid_level_names = [ - name or "{}_level_{}".format(self.dims[0], i) + name or f"{self.dims[0]}_level_{i}" for i, name in enumerate(index.names) ] index = index.set_names(valid_level_names) diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py index c1aedd570bc..527ae121dcf 100644 --- a/xarray/plot/dataset_plot.py +++ b/xarray/plot/dataset_plot.py @@ -591,9 +591,9 @@ def streamplot(ds, x, y, ax, u, v, **kwargs): if len(ds[y].dims) == 1: ydim = ds[y].dims[0] if xdim is not None and ydim is None: - ydim = set(ds[y].dims) - set([xdim]) + ydim = set(ds[y].dims) - {xdim} if ydim is not None and xdim is None: - xdim = set(ds[x].dims) - set([ydim]) + xdim = set(ds[x].dims) - {ydim} x, y, u, v = broadcast(ds[x], ds[y], ds[u], ds[v]) diff --git a/xarray/testing.py b/xarray/testing.py index 40ca12852b9..4369b828daf 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -82,7 +82,7 @@ def assert_equal(a, b): elif isinstance(a, Dataset): assert a.equals(b), formatting.diff_dataset_repr(a, b, "equals") else: - raise TypeError("{} not supported by assertion comparison".format(type(a))) + raise TypeError(f"{type(a)} not supported by assertion comparison") @ensure_warnings @@ -113,7 +113,7 @@ def assert_identical(a, b): elif isinstance(a, (Dataset, Variable)): assert a.identical(b), formatting.diff_dataset_repr(a, b, "identical") else: - raise TypeError("{} not supported by assertion comparison".format(type(a))) + raise TypeError(f"{type(a)} not supported by assertion comparison") @ensure_warnings @@ -170,7 +170,7 @@ def compat_variable(a, b): ) assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv) else: - raise TypeError("{} not supported by assertion comparison".format(type(a))) + raise TypeError(f"{type(a)} not supported by assertion comparison") def _format_message(x, y, err_msg, verbose): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index bffac52e979..356335f47e6 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -743,9 +743,7 @@ def find_and_validate_array(obj): elif isinstance(obj.array, pd.Index): assert isinstance(obj, indexing.PandasIndexingAdapter) else: - raise TypeError( - "{} is wrapped by {}".format(type(obj.array), type(obj)) - ) + raise TypeError(f"{type(obj.array)} is wrapped by {type(obj)}") for k, v in ds.variables.items(): find_and_validate_array(v._data) @@ -1195,7 +1193,7 @@ def test_multiindex_not_implemented(self): @contextlib.contextmanager def create_tmp_file(suffix=".nc", allow_cleanup_failure=False): temp_dir = tempfile.mkdtemp() - path = os.path.join(temp_dir, "temp-{}{}".format(next(_counter), suffix)) + path = os.path.join(temp_dir, f"temp-{next(_counter)}{suffix}") try: yield path finally: @@ -4222,8 +4220,8 @@ def create_tmp_geotiff( transform = from_origin(*transform_args) if additional_attrs is None: additional_attrs = { - "descriptions": tuple("d{}".format(n + 1) for n in range(nz)), - "units": tuple("u{}".format(n + 1) for n in range(nz)), + "descriptions": tuple(f"d{n + 1}" for n in range(nz)), + "units": tuple(f"u{n + 1}" for n in range(nz)), } with rasterio.open( tmp_file, diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index a8d06188844..8a37df62261 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -7,7 +7,6 @@ from xarray import DataArray, Dataset, Variable, concat from xarray.core import dtypes, merge -from xarray.core.concat import compat_options, concat_options from . import ( InaccessibleArray, diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 4d1eee6363d..472192d3a9e 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -91,7 +91,7 @@ def test_backends_dict_from_pkg() -> None: entrypoints = [EntryPoint(name, value, group) for name, value, group in specs] engines = plugins.backends_dict_from_pkg(entrypoints) assert len(engines) == 2 - assert engines.keys() == set(("engine1", "engine2")) + assert engines.keys() == {"engine1", "engine2"} def test_set_missing_parameters() -> None: diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 3267af8b45b..3f69705e3f1 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -232,7 +232,7 @@ def __hash__(self): return hash(self.item) def __repr__(self): - return "{}(item={!r})".format(type(self).__name__, self.item) + return f"{type(self).__name__}(item={self.item!r})" item = HashableItemWrapper((1, 2, 3)) x = self.cls("x", [item]) diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py index 7f6eed55e9b..24907a158ef 100644 --- a/xarray/ufuncs.py +++ b/xarray/ufuncs.py @@ -53,9 +53,7 @@ def __call__(self, *args, **kwargs): new_args = args res = _UNDEFINED if len(args) > 2 or len(args) == 0: - raise TypeError( - "cannot handle {} arguments for {!r}".format(len(args), self._name) - ) + raise TypeError(f"cannot handle {len(args)} arguments for {self._name!r}") elif len(args) == 1: if isinstance(args[0], _xarray_types): res = args[0]._unary_op(self) From a4df90659dadd89cdaa068ff0b8ae80bd3e0a417 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 19 Jan 2022 18:51:10 -0700 Subject: [PATCH 06/68] Trigger CI on push or pull_request but not both (#5142) --- .github/workflows/ci-additional.yaml | 21 +++++++++++++++++--- .github/workflows/ci.yaml | 21 ++++++++++++++++++-- .github/workflows/publish-test-results.yaml | 22 ++++++++++++++++++++- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index fac4bb133b1..3fbe1e2f460 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -9,12 +9,26 @@ on: workflow_dispatch: # allows you to trigger manually jobs: - detect-ci-trigger: - name: detect ci trigger + skip-duplicate-jobs: runs-on: ubuntu-latest if: | github.repository == 'pydata/xarray' && (github.event_name == 'push' || github.event_name == 'pull_request') + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@v3.4.1 + with: + # For workflows which are triggered concurrently with the same + # contents, attempt to execute them exactly once. + concurrent_skipping: 'same_content_newer' + paths_ignore: '["**/doc/**"]' + detect-ci-trigger: + name: detect ci trigger + runs-on: ubuntu-latest + needs: skip-duplicate-jobs + if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: @@ -115,7 +129,8 @@ jobs: doctest: name: Doctests runs-on: "ubuntu-latest" - if: github.repository == 'pydata/xarray' + needs: skip-duplicate-jobs + if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} defaults: run: shell: bash -l {0} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 447507ad25f..28e36c47e26 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,12 +9,27 @@ on: workflow_dispatch: # allows you to trigger manually jobs: - detect-ci-trigger: - name: detect ci trigger + skip-duplicate-jobs: runs-on: ubuntu-latest if: | github.repository == 'pydata/xarray' && (github.event_name == 'push' || github.event_name == 'pull_request') + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@v3.4.1 + with: + # For workflows which are triggered concurrently with the same + # contents, attempt to execute them exactly once. + concurrent_skipping: 'same_content_newer' + paths_ignore: '["**/doc/**"]' + + detect-ci-trigger: + name: detect ci trigger + needs: skip-duplicate-jobs + if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} + runs-on: ubuntu-latest outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: @@ -110,6 +125,8 @@ jobs: event_file: name: "Event File" + needs: skip-duplicate-jobs + if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} runs-on: ubuntu-latest steps: - name: Upload diff --git a/.github/workflows/publish-test-results.yaml b/.github/workflows/publish-test-results.yaml index a2e02c28f5a..ea429d360c5 100644 --- a/.github/workflows/publish-test-results.yaml +++ b/.github/workflows/publish-test-results.yaml @@ -9,10 +9,30 @@ on: - completed jobs: + + skip-duplicate-jobs: + runs-on: ubuntu-latest + if: | + github.repository == 'pydata/xarray' + && (github.event_name == 'push' || github.event_name == 'pull_request') + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@v3.4.1 + with: + # For workflows which are triggered concurrently with the same + # contents, attempt to execute them exactly once. + concurrent_skipping: 'same_content_newer' + paths_ignore: '["**/doc/**"]' + publish-test-results: name: Publish test results runs-on: ubuntu-latest - if: github.event.workflow_run.conclusion != 'skipped' + needs: skip-duplicate-jobs + if: | + needs.skip-duplicate-jobs.outputs.should_skip != 'true' + && github.event.workflow_run.conclusion != 'skipped' steps: - name: Download and extract artifacts From 0ffb0f42282a1b67c4950e90e1e4ecd146307aa8 Mon Sep 17 00:00:00 2001 From: readthedocs-assistant <96542097+readthedocs-assistant@users.noreply.github.com> Date: Thu, 20 Jan 2022 02:51:35 +0100 Subject: [PATCH 07/68] Update Read the Docs configuration (automatic) (#6175) Co-authored-by: Anderson Banihirwe --- readthedocs.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/readthedocs.yml b/readthedocs.yml index 072a4b5110c..89266a10fc8 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,12 +1,10 @@ version: 2 - build: - image: latest - -conda: - environment: ci/requirements/doc.yml - + os: ubuntu-20.04 + tools: + python: mambaforge-4.10 sphinx: fail_on_warning: true - +conda: + environment: ci/requirements/doc.yml formats: [] From e512cf2f0b31cf9080b506cd5814ed0a5a185ce9 Mon Sep 17 00:00:00 2001 From: Abel Aoun Date: Fri, 21 Jan 2022 17:16:25 +0100 Subject: [PATCH 08/68] DOC: fix dead link to numpy gufunc docs page (#6182) --- doc/user-guide/computation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index f58767efb29..b0cc93ce282 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -761,7 +761,7 @@ any additional arguments: For using more complex operations that consider some array values collectively, it's important to understand the idea of "core dimensions" from NumPy's -`generalized ufuncs `_. Core dimensions are defined as dimensions +`generalized ufuncs `_. Core dimensions are defined as dimensions that should *not* be broadcast over. Usually, they correspond to the fundamental dimensions over which an operation is defined, e.g., the summed axis in ``np.sum``. A good clue that core dimensions are needed is the presence of an From 10855dc1a82350bfdbff5489f8b9a9764623bb5e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 21 Jan 2022 18:06:42 +0100 Subject: [PATCH 09/68] Add python 3.10 to CI (#5844) * Add python 3.10 to CI * test * test * [test-upstream] test * [test-upstream] test without numbaagg * test without rasterio * remove setuptools * Update environment.yml * pydap errors * test rasterio --- .github/workflows/ci.yaml | 2 +- .github/workflows/upstream-dev-ci.yaml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 28e36c47e26..74603d4398f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -53,7 +53,7 @@ jobs: matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] # Bookend python versions - python-version: ["3.8", "3.9"] + python-version: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v2 with: diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 49415683d07..67415331bbd 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -44,7 +44,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9"] + python-version: ["3.10"] outputs: artifacts_availability: ${{ steps.status.outputs.ARTIFACTS_AVAILABLE }} steps: diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 8dafb6f80f6..05fa5fecba0 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -27,7 +27,7 @@ dependencies: - pip - pre-commit - pseudonetcdf - - pydap + # - pydap # https://github.com/pydap/pydap/pull/210 # - pynio # Not available on Windows - pytest - pytest-cov diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index eab06fbe0f8..46371247c4d 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -31,7 +31,7 @@ dependencies: - pooch - pre-commit - pseudonetcdf - - pydap + # - pydap # https://github.com/pydap/pydap/pull/210 # - pynio: not compatible with netCDF4>1.5.3; only tested in py37-bare-minimum - pytest - pytest-cov From c54123772817875678ec7ad769e6d4d6612aeb92 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 21 Jan 2022 19:00:51 +0100 Subject: [PATCH 10/68] remove no longer necessary version checks (#6177) --- xarray/backends/h5netcdf_.py | 8 +------- xarray/core/_reductions.py | 8 +------- xarray/core/dask_array_compat.py | 7 ++----- xarray/core/dataset.py | 7 +------ xarray/core/duck_array_ops.py | 20 +------------------- xarray/tests/test_accessor_dt.py | 7 +------ xarray/tests/test_cftime_offsets.py | 2 -- xarray/tests/test_dataarray.py | 17 +++++------------ xarray/tests/test_plugins.py | 12 ++---------- xarray/tests/test_sparse.py | 2 +- xarray/util/generate_reductions.py | 10 ++-------- 11 files changed, 17 insertions(+), 83 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index a52e539181f..671ea617fb6 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -159,13 +159,7 @@ def open( kwargs = {"invalid_netcdf": invalid_netcdf} if phony_dims is not None: - if Version(h5netcdf.__version__) >= Version("0.8.0"): - kwargs["phony_dims"] = phony_dims - else: - raise ValueError( - "h5netcdf backend keyword argument 'phony_dims' needs " - "h5netcdf >= 0.8.0." - ) + kwargs["phony_dims"] = phony_dims if Version(h5netcdf.__version__) >= Version("0.10.0") and Version( h5netcdf.core.h5py.__version__ ) >= Version("3.0.0"): diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 67fbbd482d0..83aaa10a20c 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,17 +1,11 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union from . import duck_array_ops from .types import T_DataArray, T_Dataset -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol - class DatasetReduce(Protocol): def reduce( diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index de8375bf721..0e0229cc3ca 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -179,8 +179,5 @@ def sliding_window_view(x, window_shape, axis=None): window_shape=window_shape, axis=axis, ) - # map_overlap's signature changed in https://github.com/dask/dask/pull/6165 - if dask_version > Version("2.18.0"): - return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs) - else: - return map_overlap(x, _np_sliding_window_view, **kwargs) + + return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 26ef95f64f9..29e8de39f7a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -19,6 +19,7 @@ Hashable, Iterable, Iterator, + Literal, Mapping, MutableMapping, Sequence, @@ -101,12 +102,6 @@ broadcast_variables, ) -# TODO: Remove this check once python 3.7 is not supported: -if sys.version_info >= (3, 8): - from typing import Literal -else: - from typing_extensions import Literal - if TYPE_CHECKING: from ..backends import AbstractDataStore, ZarrStore from .dataarray import DataArray diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 5b0d9a4fcd4..b85d0e1645e 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -20,17 +20,10 @@ from numpy import stack as _stack from numpy import take, tensordot, transpose, unravel_index # noqa from numpy import where as _where -from packaging.version import Version from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast -from .pycompat import ( - cupy_array_type, - dask_array_type, - is_duck_dask_array, - sparse_array_type, - sparse_version, -) +from .pycompat import cupy_array_type, dask_array_type, is_duck_dask_array from .utils import is_duck_array try: @@ -174,17 +167,6 @@ def cumulative_trapezoid(y, x, axis): def astype(data, dtype, **kwargs): - if ( - isinstance(data, sparse_array_type) - and sparse_version < Version("0.11.0") - and "casting" in kwargs - ): - warnings.warn( - "The current version of sparse does not support the 'casting' argument. It will be ignored in the call to astype().", - RuntimeWarning, - stacklevel=4, - ) - kwargs.pop("casting") return data.astype(dtype, **kwargs) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index e9278f1e918..0cb11607435 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -402,8 +402,7 @@ def times_3d(times): "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] ) def test_field_access(data, field) -> None: - if field == "dayofyear" or field == "dayofweek": - pytest.importorskip("cftime", minversion="1.0.2.1") + result = getattr(data.time.dt, field) expected = xr.DataArray( getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field), @@ -504,8 +503,6 @@ def test_cftime_strftime_access(data) -> None: def test_dask_field_access_1d(data, field) -> None: import dask.array as da - if field == "dayofyear" or field == "dayofweek": - pytest.importorskip("cftime", minversion="1.0.2.1") expected = xr.DataArray( getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field), name=field, @@ -526,8 +523,6 @@ def test_dask_field_access_1d(data, field) -> None: def test_dask_field_access(times_3d, data, field) -> None: import dask.array as da - if field == "dayofyear" or field == "dayofweek": - pytest.importorskip("cftime", minversion="1.0.2.1") expected = xr.DataArray( getattr( xr.coding.cftimeindex.CFTimeIndex(times_3d.values.ravel()), field diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 061c1420aba..4f94b35e3c3 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1203,7 +1203,6 @@ def test_calendar_year_length(calendar, start, end, expected_number_of_days): @pytest.mark.parametrize("freq", ["A", "M", "D"]) def test_dayofweek_after_cftime_range(freq): - pytest.importorskip("cftime", minversion="1.0.2.1") result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek np.testing.assert_array_equal(result, expected) @@ -1211,7 +1210,6 @@ def test_dayofweek_after_cftime_range(freq): @pytest.mark.parametrize("freq", ["A", "M", "D"]) def test_dayofyear_after_cftime_range(freq): - pytest.importorskip("cftime", minversion="1.0.2.1") result = cftime_range("2000-02-01", periods=3, freq=freq).dayofyear expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofyear np.testing.assert_array_equal(result, expected) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f1945b0e224..26c5459870d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2161,18 +2161,11 @@ def test_stack_unstack(self): # test GH3000 a = orig[:0, :1].stack(dim=("x", "y")).dim.to_index() - if pd.__version__ < "0.24.0": - b = pd.MultiIndex( - levels=[pd.Int64Index([]), pd.Int64Index([0])], - labels=[[], []], - names=["x", "y"], - ) - else: - b = pd.MultiIndex( - levels=[pd.Int64Index([]), pd.Int64Index([0])], - codes=[[], []], - names=["x", "y"], - ) + b = pd.MultiIndex( + levels=[pd.Int64Index([]), pd.Int64Index([0])], + codes=[[], []], + names=["x", "y"], + ) pd.testing.assert_index_equal(a, b) actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 472192d3a9e..218ed1ea2e5 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -1,19 +1,11 @@ -import sys +from importlib.metadata import EntryPoint from unittest import mock import pytest from xarray.backends import common, plugins -if sys.version_info >= (3, 8): - from importlib.metadata import EntryPoint - - importlib_metadata_mock = "importlib.metadata" -else: - # if the fallback library is missing, we are doomed. - from importlib_metadata import EntryPoint - - importlib_metadata_mock = "importlib_metadata" +importlib_metadata_mock = "importlib.metadata" class DummyBackendEntrypointArgs(common.BackendEntrypoint): diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index ad51534ddbf..651a0f64d2a 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -703,8 +703,8 @@ def test_dataset_repr(self): ) assert expected == repr(ds) + @requires_dask def test_sparse_dask_dataset_repr(self): - pytest.importorskip("dask", minversion="2.0") ds = xr.Dataset( data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))} ).chunk() diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 72449195d1e..70c92d1a96f 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -22,16 +22,10 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union from . import duck_array_ops -from .types import T_DataArray, T_Dataset - -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol''' +from .types import T_DataArray, T_Dataset''' OBJ_PREAMBLE = """ From 23faa50f8c5c75193657b130a6c9d506225af0de Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 26 Jan 2022 09:31:39 +0100 Subject: [PATCH 11/68] don't install bottleneck wheel for upstream CI (#6193) * use py3.9 for upstream CI (bottleneck issue) * don't install bottleneck upstream wheel [test-upstream] --- ci/install-upstream-wheels.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index b06eb1cc847..89cc81d0f3f 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -12,10 +12,10 @@ conda uninstall -y --force \ cftime \ rasterio \ pint \ - bottleneck \ sparse \ h5netcdf \ xarray + # bottleneck \ # re-enable again, see https://github.com/pydata/bottleneck/pull/378 # to limit the runtime of Upstream CI python -m pip install pytest-timeout python -m pip install \ @@ -41,8 +41,8 @@ python -m pip install \ git+https://github.com/Unidata/cftime \ git+https://github.com/mapbox/rasterio \ git+https://github.com/hgrecco/pint \ - git+https://github.com/pydata/bottleneck \ git+https://github.com/pydata/sparse \ git+https://github.com/intake/filesystem_spec \ git+https://github.com/SciTools/nc-time-axis \ git+https://github.com/h5netcdf/h5netcdf + # git+https://github.com/pydata/bottleneck \ # re-enable again, see https://github.com/pydata/bottleneck/pull/378 From 4ef41129a8aeb48942b48be22cd2ecca811e3c3e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 26 Jan 2022 20:04:02 +0100 Subject: [PATCH 12/68] Add seed kwarg to the tutorial scatter dataset (#6184) * Add seed kwarg to scatter example dataset * docstring * A little typing * mypy found issues * doc changes, list to dict * Update tutorial.py --- xarray/tutorial.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index b0a3e110d84..d4c7e643bb9 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -226,16 +226,25 @@ def load_dataset(*args, **kwargs): return ds.load() -def scatter_example_dataset(): +def scatter_example_dataset(*, seed=None) -> Dataset: + """ + Create an example dataset. + + Parameters + ---------- + seed : int, optional + Seed for the random number generation. + """ + rng = np.random.default_rng(seed) A = DataArray( np.zeros([3, 11, 4, 4]), dims=["x", "y", "z", "w"], - coords=[ - np.arange(3), - np.linspace(0, 1, 11), - np.arange(4), - 0.1 * np.random.randn(4), - ], + coords={ + "x": np.arange(3), + "y": np.linspace(0, 1, 11), + "z": np.arange(4), + "w": 0.1 * rng.standard_normal(4), + }, ) B = 0.1 * A.x ** 2 + A.y ** 2.5 + 0.1 * A.z * A.w A = -0.1 * A.x + A.y / (5 + A.z) + A.w From 10bfa77425c459691abac26477e25c5681dc396f Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 26 Jan 2022 22:45:16 +0100 Subject: [PATCH 13/68] fix cftime doctests (#6192) --- xarray/coding/cftimeindex.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 9bb8da1568b..62d7116a658 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -537,10 +537,10 @@ def shift(self, n, freq): >>> index = xr.cftime_range("2000", periods=1, freq="M") >>> index CFTimeIndex([2000-01-31 00:00:00], - dtype='object', length=1, calendar='gregorian', freq=None) + dtype='object', length=1, calendar='standard', freq=None) >>> index.shift(1, "M") CFTimeIndex([2000-02-29 00:00:00], - dtype='object', length=1, calendar='gregorian', freq=None) + dtype='object', length=1, calendar='standard', freq=None) """ from .cftime_offsets import to_offset @@ -626,7 +626,7 @@ def to_datetimeindex(self, unsafe=False): >>> times = xr.cftime_range("2000", periods=2, calendar="gregorian") >>> times CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], - dtype='object', length=2, calendar='gregorian', freq=None) + dtype='object', length=2, calendar='standard', freq=None) >>> times.to_datetimeindex() DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None) """ From 4692c59679ae6cef370012aeb0f3d5f58efd3b4d Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 27 Jan 2022 22:06:39 +0100 Subject: [PATCH 14/68] MAINT: pandas 1.4: no longer use get_loc with method (#6195) --- xarray/core/indexes.py | 11 ++++++++--- xarray/core/missing.py | 5 ++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 844751f24bb..b66fbdf6504 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -231,9 +231,14 @@ def query(self, labels, method=None, tolerance=None): ) indexer = self.index.get_loc(label_value) else: - indexer = self.index.get_loc( - label_value, method=method, tolerance=tolerance - ) + if method is not None: + indexer = get_indexer_nd(self.index, label, method, tolerance) + if np.any(indexer < 0): + raise KeyError( + f"not all values found in index {coord_name!r}" + ) + else: + indexer = self.index.get_loc(label_value) elif label.dtype.kind == "b": indexer = label else: diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 2525272f719..f3bb5351db5 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -564,9 +564,8 @@ def _localize(var, indexes_coords): minval = np.nanmin(new_x.values) maxval = np.nanmax(new_x.values) index = x.to_index() - imin = index.get_loc(minval, method="nearest") - imax = index.get_loc(maxval, method="nearest") - + imin = index.get_indexer([minval], method="nearest").item() + imax = index.get_indexer([maxval], method="nearest").item() indexes[dim] = slice(max(imin - 2, 0), imax + 2) indexes_coords[dim] = (x[indexes[dim]], new_x) return var.isel(**indexes), indexes_coords From 9235548df71c1b03e76da40641dc790becf443bc Mon Sep 17 00:00:00 2001 From: Chris Roat <1053153+chrisroat@users.noreply.github.com> Date: Thu, 27 Jan 2022 13:46:58 -0800 Subject: [PATCH 15/68] Handle empty containers in zarr chunk checks (#5526) --- doc/whats-new.rst | 2 +- xarray/backends/zarr.py | 9 +++++---- xarray/tests/test_backends.py | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8896dd62379..50500e3d75f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,7 +63,7 @@ Bug fixes By `Michael Delgado `_. - `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. - +- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 8bd343869ff..efb22bef1d4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -84,7 +84,8 @@ def __getitem__(self, key): def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): """ - Given encoding chunks (possibly None) and variable chunks (possibly None) + Given encoding chunks (possibly None or []) and variable chunks + (possibly None or []). """ # zarr chunk spec: @@ -93,7 +94,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # if there are no chunks in encoding and the variable data is a numpy # array, then we let zarr use its own heuristics to pick the chunks - if var_chunks is None and enc_chunks is None: + if not var_chunks and not enc_chunks: return None # if there are no chunks in encoding but there are dask chunks, we try to @@ -102,7 +103,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks # while dask chunks can be variable sized # http://dask.pydata.org/en/latest/array-design.html#chunks - if var_chunks and enc_chunks is None: + if var_chunks and not enc_chunks: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( "Zarr requires uniform chunk sizes except for final chunk. " @@ -145,7 +146,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # if there are chunks in encoding and the variable data is a numpy array, # we use the specified chunks - if var_chunks is None: + if not var_chunks: return enc_chunks_tuple # the hard case diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 356335f47e6..bea90e1fca9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2383,6 +2383,20 @@ def test_open_zarr_use_cftime(self): ds_b = xr.open_zarr(store_target, use_cftime=True) assert xr.coding.times.contains_cftime_datetimes(ds_b.time) + def test_write_read_select_write(self): + # Test for https://github.com/pydata/xarray/issues/4084 + ds = create_test_data() + + # NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug. + with self.create_zarr_target() as initial_store: + ds.to_zarr(initial_store, mode="w") + ds1 = xr.open_zarr(initial_store) + + # Combination of where+squeeze triggers error on write. + ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3") + with self.create_zarr_target() as final_store: + ds_sel.to_zarr(final_store, mode="w") + @requires_zarr class TestZarrDictStore(ZarrBase): From e50e575a798df5f6f98c647a16411a866401fe35 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 28 Jan 2022 06:41:49 +0100 Subject: [PATCH 16/68] doc: fix pd datetime parsing warning [skip-ci] (#6194) --- doc/user-guide/computation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index b0cc93ce282..cb6eadc8e63 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -364,7 +364,7 @@ methods. This supports the block aggregation along multiple dimensions, .. ipython:: python x = np.linspace(0, 10, 300) - t = pd.date_range("15/12/1999", periods=364) + t = pd.date_range("1999-12-15", periods=364) da = xr.DataArray( np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), dims=["time", "x"], From 8a885c1891f84536a3b4a15c03a61bfa132d180f Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 28 Jan 2022 14:16:06 -0800 Subject: [PATCH 17/68] Add release notes for v0.21.0 (#6203) * Add release notes for v0.21.0 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update doc/whats-new.rst Co-authored-by: Deepak Cherian Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- HOW_TO_RELEASE.md | 4 ++-- doc/whats-new.rst | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 16dc3b94196..33a4fb9bfc3 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -25,11 +25,11 @@ upstream https://github.com/pydata/xarray (push) ``` 3. Add a list of contributors with: ```sh - git log "$(git tag --sort="v:refname" | tail -1).." --format=%aN | sort -u | perl -pe 's/\n/$1, /' + git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | perl -pe 's/\n/$1, /' ``` This will return the number of contributors: ```sh - git log $(git tag --sort="v:refname" | tail -1).. --format=%aN | sort -u | wc -l + git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | wc -l ``` 4. Write a release summary: ~50 words describing the high level features. This will be used in the release emails, tweets, GitHub release notes, etc. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 50500e3d75f..ef4a6c75523 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,10 +14,19 @@ What's New np.random.seed(123456) -.. _whats-new.0.X.Y+1: +.. _whats-new.0.21.0: + +v0.21.0 (27 January 2022) +------------------------- + +Many thanks to the 20 contributors to the v0.21.0 release! + +Abel Aoun, Anderson Banihirwe, Ant Gib, Chris Roat, Cindy Chiao, +Deepak Cherian, Dominik Stańczak, Fabian Hofmann, Illviljan, Jody Klymak, Joseph +K Aicher, Mark Harfouche, Mathias Hauser, Matthew Roeschke, Maximilian Roos, +Michael Delgado, Pascal Bourgault, Pierre, Ray Bell, Romain Caneill, Tim Heap, +Tom Nicholas, Zeb Nicholls, joseph nowak, keewis. -v0.21.0 (unreleased) ---------------------- New Features ~~~~~~~~~~~~ @@ -65,10 +74,6 @@ Bug fixes By `Pierre Loicq `_. - Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat `_. -Documentation -~~~~~~~~~~~~~ - - Internal Changes ~~~~~~~~~~~~~~~~ From 9b8cba40e23c22192a0a359fbd2c4f0279314af3 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 28 Jan 2022 15:24:59 -0800 Subject: [PATCH 18/68] Whatsnew template for 0.21.1 (#6205) --- doc/whats-new.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ef4a6c75523..a33f557a179 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,35 @@ What's New np.random.seed(123456) +.. _whats-new.0.21.1: + +v0.21.1 (unreleased) +-------------------- + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + .. _whats-new.0.21.0: v0.21.0 (27 January 2022) From 5470d933452d88deb17cc9294a164c4a03f55dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sat, 29 Jan 2022 08:55:01 +0100 Subject: [PATCH 19/68] MNT: prepare h5netcdf backend for (coming) change in dimension handling (#6200) * prepare h5netcdf backend for (coming) change in dimension handling * [test-upstream] --- xarray/backends/h5netcdf_.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 671ea617fb6..735aa5fc3bc 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -81,7 +81,11 @@ def _read_attributes(h5netcdf_var): _extract_h5nc_encoding = functools.partial( - _extract_nc4_variable_encoding, lsd_okay=False, h5py_okay=True, backend="h5netcdf" + _extract_nc4_variable_encoding, + lsd_okay=False, + h5py_okay=True, + backend="h5netcdf", + unlimited_dims=None, ) @@ -231,12 +235,24 @@ def get_attrs(self): return FrozenDict(_read_attributes(self.ds)) def get_dimensions(self): - return self.ds.dimensions + if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"): + return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items()) + else: + return self.ds.dimensions def get_encoding(self): - return { - "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None} - } + if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"): + return { + "unlimited_dims": { + k for k, v in self.ds.dimensions.items() if v.isunlimited() + } + } + else: + return { + "unlimited_dims": { + k for k, v in self.ds.dimensions.items() if v is None + } + } def set_dimension(self, name, length, is_unlimited=False): if is_unlimited: From aab856bf7429d6b0feae795b53fde5a551f184e0 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 30 Jan 2022 17:49:09 -0800 Subject: [PATCH 20/68] Xfail failing test (#6211) * Xfail failing test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * . * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * if it breaks xfail it Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- xarray/tests/test_distributed.py | 2 ++ xarray/tests/test_variable.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index 92f39069aa3..88811c93776 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -184,6 +184,7 @@ def test_dask_distributed_cfgrib_integration_test(loop) -> None: assert_allclose(actual, expected) +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/pull/6211") @gen_cluster(client=True) async def test_async(c, s, a, b) -> None: x = create_test_data() @@ -216,6 +217,7 @@ def test_hdf5_lock() -> None: assert isinstance(HDF5_LOCK, dask.utils.SerializableLock) +@pytest.mark.xfail(reason="https://github.com/pydata/xarray/pull/6211") @gen_cluster(client=True) async def test_serializable_locks(c, s, a, b) -> None: def f(x, lock=None): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 3f69705e3f1..2e004ca0960 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2161,6 +2161,12 @@ def test_dask_rolling(self, dim, window, center): assert actual.shape == expected.shape assert_equal(actual, expected) + @pytest.mark.xfail( + reason="https://github.com/pydata/xarray/issues/6209#issuecomment-1025116203" + ) + def test_multiindex(self): + super().test_multiindex() + @requires_sparse class TestVariableWithSparse: From b09de8195a9e22dd35d1b7ed608ea15dad0806ef Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Mon, 31 Jan 2022 17:59:27 +0100 Subject: [PATCH 21/68] `GHA` `concurrency` (#6210) --- .github/workflows/cancel-duplicate-runs.yaml | 15 ------------ .github/workflows/ci-additional.yaml | 23 ++++--------------- .github/workflows/ci.yaml | 24 ++++---------------- .github/workflows/upstream-dev-ci.yaml | 4 ++++ 4 files changed, 12 insertions(+), 54 deletions(-) delete mode 100644 .github/workflows/cancel-duplicate-runs.yaml diff --git a/.github/workflows/cancel-duplicate-runs.yaml b/.github/workflows/cancel-duplicate-runs.yaml deleted file mode 100644 index 9f74360b034..00000000000 --- a/.github/workflows/cancel-duplicate-runs.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: Cancel -on: - workflow_run: - workflows: ["CI", "CI Additional", "CI Upstream"] - types: - - requested -jobs: - cancel: - name: Cancel previous runs - runs-on: ubuntu-latest - if: github.repository == 'pydata/xarray' - steps: - - uses: styfle/cancel-workflow-action@0.9.1 - with: - workflow_id: ${{ github.event.workflow.id }} diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 3fbe1e2f460..2be3577d883 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -8,27 +8,14 @@ on: - "*" workflow_dispatch: # allows you to trigger manually +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: - skip-duplicate-jobs: - runs-on: ubuntu-latest - if: | - github.repository == 'pydata/xarray' - && (github.event_name == 'push' || github.event_name == 'pull_request') - outputs: - should_skip: ${{ steps.skip_check.outputs.should_skip }} - steps: - - id: skip_check - uses: fkirc/skip-duplicate-actions@v3.4.1 - with: - # For workflows which are triggered concurrently with the same - # contents, attempt to execute them exactly once. - concurrent_skipping: 'same_content_newer' - paths_ignore: '["**/doc/**"]' detect-ci-trigger: name: detect ci trigger runs-on: ubuntu-latest - needs: skip-duplicate-jobs - if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: @@ -129,8 +116,6 @@ jobs: doctest: name: Doctests runs-on: "ubuntu-latest" - needs: skip-duplicate-jobs - if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} defaults: run: shell: bash -l {0} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 74603d4398f..c11842bbb04 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,27 +8,13 @@ on: - "*" workflow_dispatch: # allows you to trigger manually -jobs: - skip-duplicate-jobs: - runs-on: ubuntu-latest - if: | - github.repository == 'pydata/xarray' - && (github.event_name == 'push' || github.event_name == 'pull_request') - outputs: - should_skip: ${{ steps.skip_check.outputs.should_skip }} - steps: - - id: skip_check - uses: fkirc/skip-duplicate-actions@v3.4.1 - with: - # For workflows which are triggered concurrently with the same - # contents, attempt to execute them exactly once. - concurrent_skipping: 'same_content_newer' - paths_ignore: '["**/doc/**"]' +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +jobs: detect-ci-trigger: name: detect ci trigger - needs: skip-duplicate-jobs - if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} runs-on: ubuntu-latest outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} @@ -125,8 +111,6 @@ jobs: event_file: name: "Event File" - needs: skip-duplicate-jobs - if: ${{ needs.skip-duplicate-jobs.outputs.should_skip != 'true' }} runs-on: ubuntu-latest steps: - name: Upload diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 67415331bbd..f1ce442c623 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -10,6 +10,10 @@ on: - cron: "0 0 * * *" # Daily “At 00:00” UTC workflow_dispatch: # allows you to trigger the workflow run manually +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: detect-ci-trigger: name: detect upstream-dev ci trigger From 0a1e7e41ff18722444c77a6b70c48454923b9857 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 31 Jan 2022 19:08:42 +0100 Subject: [PATCH 22/68] [pre-commit.ci] pre-commit autoupdate (#6221) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/psf/black: 21.12b0 → 22.1.0](https://github.com/psf/black/compare/21.12b0...22.1.0) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- xarray/backends/common.py | 2 +- xarray/coding/cftime_offsets.py | 2 +- xarray/core/weighted.py | 2 +- xarray/tests/test_backends.py | 2 +- xarray/tests/test_computation.py | 13 +++++-------- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_interp.py | 6 +++--- xarray/tests/test_plot.py | 6 +++--- xarray/tests/test_units.py | 2 +- xarray/tests/test_variable.py | 10 +++++----- xarray/tests/test_weighted.py | 2 +- xarray/tutorial.py | 2 +- 14 files changed, 26 insertions(+), 29 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5eb2a244ee5..474916cf1a1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,7 +26,7 @@ repos: - "--py37-plus" # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black - rev: 21.12b0 + rev: 22.1.0 hooks: - id: black - id: black-jupyter diff --git a/xarray/backends/common.py b/xarray/backends/common.py index f33a9ab2814..f659d71760b 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -65,7 +65,7 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 except catch: if n == max_retries: raise - base_delay = initial_delay * 2 ** n + base_delay = initial_delay * 2**n next_delay = base_delay + np.random.randint(base_delay) msg = ( f"getitem failed, waiting {next_delay} ms before trying again " diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 6557590dbb8..30bfd882b5c 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -671,7 +671,7 @@ def __apply__(self, other): _FREQUENCY_CONDITION = "|".join(_FREQUENCIES.keys()) -_PATTERN = fr"^((?P\d+)|())(?P({_FREQUENCY_CONDITION}))$" +_PATTERN = rf"^((?P\d+)|())(?P({_FREQUENCY_CONDITION}))$" # pandas defines these offsets as "Tick" objects, which for instance have diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 0676d351b6f..f34f993ef31 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -189,7 +189,7 @@ def _sum_of_squares( demeaned = da - da.weighted(self.weights).mean(dim=dim) - return self._reduce((demeaned ** 2), self.weights, dim=dim, skipna=skipna) + return self._reduce((demeaned**2), self.weights, dim=dim, skipna=skipna) def _weighted_sum( self, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index bea90e1fca9..2e06ac25253 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1444,7 +1444,7 @@ def test_encoding_chunksizes_unlimited(self): "complevel": 0, "fletcher32": False, "contiguous": False, - "chunksizes": (2 ** 20,), + "chunksizes": (2**20,), "original_shape": (3,), } with self.roundtrip(ds) as actual: diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index a51bfb03641..de85c112048 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -475,13 +475,10 @@ def test_unified_dim_sizes() -> None: "x": 1, "y": 2, } - assert ( - unified_dim_sizes( - [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])], - exclude_dims={"z"}, - ) - == {"x": 1, "y": 2} - ) + assert unified_dim_sizes( + [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])], + exclude_dims={"z"}, + ) == {"x": 1, "y": 2} # duplicate dimensions with pytest.raises(ValueError): @@ -1947,7 +1944,7 @@ def test_polyval(use_dask, use_datetime) -> None: xcoord = xr.DataArray(x, dims=("x",), name="x") da = xr.DataArray( - np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)), + np.stack((1.0 + x + 2.0 * x**2, 1.0 + 2.0 * x + 3.0 * x**2)), dims=("d", "x"), coords={"x": xcoord, "d": [0, 1]}, ) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 26c5459870d..61bd19f1515 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3727,7 +3727,7 @@ def test_polyfit(self, use_dask, use_datetime): da_raw = DataArray( np.stack( - (10 + 1e-15 * x + 2e-28 * x ** 2, 30 + 2e-14 * x + 1e-29 * x ** 2) + (10 + 1e-15 * x + 2e-28 * x**2, 30 + 2e-14 * x + 1e-29 * x**2) ), dims=("d", "x"), coords={"x": xcoord, "d": [0, 1]}, diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c0c1f2224cf..3418c2e55e6 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3644,7 +3644,7 @@ def test_assign(self): assert list(actual.variables) == ["x", "y"] assert_identical(ds, Dataset()) - actual = actual.assign(y=lambda ds: ds.x ** 2) + actual = actual.assign(y=lambda ds: ds.x**2) expected = Dataset({"y": ("x", [0, 1, 4]), "x": [0, 1, 2]}) assert_identical(actual, expected) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index fd480436889..f6d8a7cfcb0 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -30,7 +30,7 @@ def get_example_data(case): data = xr.DataArray( np.sin(x[:, np.newaxis]) * np.cos(y), dims=["x", "y"], - coords={"x": x, "y": y, "x2": ("x", x ** 2)}, + coords={"x": x, "y": y, "x2": ("x", x**2)}, ) if case == 0: @@ -46,7 +46,7 @@ def get_example_data(case): return xr.DataArray( np.sin(x[:, np.newaxis, np.newaxis]) * np.cos(y[:, np.newaxis]) * z, dims=["x", "y", "z"], - coords={"x": x, "y": y, "x2": ("x", x ** 2), "z": z}, + coords={"x": x, "y": y, "x2": ("x", x**2), "z": z}, ) elif case == 4: return get_example_data(3).chunk({"z": 5}) @@ -440,7 +440,7 @@ def test_sorted(): da = xr.DataArray( np.cos(x[:, np.newaxis, np.newaxis]) * np.cos(y[:, np.newaxis]) * z, dims=["x", "y", "z"], - coords={"x": x, "y": y, "x2": ("x", x ** 2), "z": z}, + coords={"x": x, "y": y, "x2": ("x", x**2), "z": z}, ) x_new = np.linspace(0, 1, 30) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 3088b7e109c..c0b6d355441 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -550,7 +550,7 @@ def test_geo_data(self): [-137.85, -120.99, -103.28, -85.28, -67.62], ] ) - data = np.sqrt(lon ** 2 + lat ** 2) + data = np.sqrt(lon**2 + lat**2) da = DataArray( data, dims=("y", "x"), @@ -2886,8 +2886,8 @@ def test_plot_transposes_properly(plotfunc): def test_facetgrid_single_contour(): # regression test for GH3569 x, y = np.meshgrid(np.arange(12), np.arange(12)) - z = xr.DataArray(np.sqrt(x ** 2 + y ** 2)) - z2 = xr.DataArray(np.sqrt(x ** 2 + y ** 2) + 1) + z = xr.DataArray(np.sqrt(x**2 + y**2)) + z2 = xr.DataArray(np.sqrt(x**2 + y**2) + 1) ds = xr.concat([z, z2], dim="time") ds["time"] = [0, 1] diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 1225ecde5fb..a083c50c3d1 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -5459,7 +5459,7 @@ def test_grouped_operations(self, func, variant, dtype): def test_content_manipulation(self, func, variant, dtype): variants = { "data": ( - (unit_registry.m ** 3, unit_registry.Pa, unit_registry.degK), + (unit_registry.m**3, unit_registry.Pa, unit_registry.degK), 1, 1, ), diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 2e004ca0960..f98aaaa5257 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -343,8 +343,8 @@ def test_1d_math(self): assert_identical(base_v, 0 + v) assert_identical(base_v, v * 1) # binary ops with numpy arrays - assert_array_equal((v * x).values, x ** 2) - assert_array_equal((x * v).values, x ** 2) + assert_array_equal((v * x).values, x**2) + assert_array_equal((x * v).values, x**2) assert_array_equal(v - y, v - 1) assert_array_equal(y - v, 1 - v) # verify attributes are dropped @@ -358,7 +358,7 @@ def test_1d_math(self): assert_array_equal((v * w).values, x * y) # something complicated - assert_array_equal((v ** 2 * w - 1 + x).values, x ** 2 * y - 1 + x) + assert_array_equal((v**2 * w - 1 + x).values, x**2 * y - 1 + x) # make sure dtype is preserved (for Index objects) assert float == (+v).dtype assert float == (+v).values.dtype @@ -1019,7 +1019,7 @@ def test_datetime64_conversion_scalar(self): assert v.values.dtype == np.dtype("datetime64[ns]") def test_timedelta64_conversion_scalar(self): - expected = np.timedelta64(24 * 60 * 60 * 10 ** 9, "ns") + expected = np.timedelta64(24 * 60 * 60 * 10**9, "ns") for values in [ np.timedelta64(1, "D"), pd.Timedelta("1 day"), @@ -1048,7 +1048,7 @@ def test_0d_timedelta(self): for td in [pd.to_timedelta("1s"), np.timedelta64(1, "s")]: v = Variable([], td) assert v.dtype == np.dtype("timedelta64[ns]") - assert v.values == np.timedelta64(10 ** 9, "ns") + assert v.values == np.timedelta64(10**9, "ns") def test_equals_and_identical(self): d = np.random.rand(10, 3) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 36923ed49c3..fb057057d2d 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -393,7 +393,7 @@ def expected_weighted(da, weights, dim, skipna, operation): return weighted_mean demeaned = da - weighted_mean - sum_of_squares = ((demeaned ** 2) * weights).sum(dim=dim, skipna=skipna) + sum_of_squares = ((demeaned**2) * weights).sum(dim=dim, skipna=skipna) if operation == "sum_of_squares": return sum_of_squares diff --git a/xarray/tutorial.py b/xarray/tutorial.py index d4c7e643bb9..d9ff3b1492d 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -246,7 +246,7 @@ def scatter_example_dataset(*, seed=None) -> Dataset: "w": 0.1 * rng.standard_normal(4), }, ) - B = 0.1 * A.x ** 2 + A.y ** 2.5 + 0.1 * A.z * A.w + B = 0.1 * A.x**2 + A.y**2.5 + 0.1 * A.z * A.w A = -0.1 * A.x + A.y / (5 + A.z) + A.w ds = Dataset({"A": A, "B": B}) ds["w"] = ["one", "two", "three", "five"] From e8d42394b0b4a887ca7246aaae55f9347076f430 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 31 Jan 2022 10:35:26 -0800 Subject: [PATCH 23/68] update release steps (#6214) - no more stable branch - switch to calver --- HOW_TO_RELEASE.md | 62 ++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 44 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 33a4fb9bfc3..893a6d77168 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -18,12 +18,7 @@ upstream https://github.com/pydata/xarray (push) git switch main git pull upstream main ``` - 2. Confirm there are no commits on stable that are not yet merged - ([ref](https://github.com/pydata/xarray/pull/4440)): - ```sh - git merge upstream/stable - ``` - 3. Add a list of contributors with: + 2. Add a list of contributors with: ```sh git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | perl -pe 's/\n/$1, /' ``` @@ -31,52 +26,39 @@ upstream https://github.com/pydata/xarray (push) ```sh git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | wc -l ``` - 4. Write a release summary: ~50 words describing the high level features. This + 3. Write a release summary: ~50 words describing the high level features. This will be used in the release emails, tweets, GitHub release notes, etc. - 5. Look over whats-new.rst and the docs. Make sure "What's New" is complete + 4. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and add the release summary at the top. Things to watch out for: - Important new features should be highlighted towards the top. - Function/method references should include links to the API docs. - Sometimes notes get added in the wrong section of whats-new, typically due to a bad merge. Check for these before a release by using git diff, - e.g., `git diff v{0.X.Y-1} whats-new.rst` where {0.X.Y-1} is the previous + e.g., `git diff v{YYYY.MM.X-1} whats-new.rst` where {YYYY.MM.X-1} is the previous release. - 6. Open a PR with the release summary and whatsnew changes; in particular the + 5. Open a PR with the release summary and whatsnew changes; in particular the release headline should get feedback from the team on what's important to include. - 7. After merging, again ensure your main branch is synced to upstream: + 6. After merging, again ensure your main branch is synced to upstream: ```sh git pull upstream main ``` - 8. If you have any doubts, run the full test suite one final time! + 7. If you have any doubts, run the full test suite one final time! ```sh pytest ``` - 9. Check that the ReadTheDocs build is passing. -10. Issue the release on GitHub. Click on "Draft a new release" at + 8. Check that the ReadTheDocs build is passing on the `main` branch. + 9. Issue the release on GitHub. Click on "Draft a new release" at . Type in the version number (with a "v") and paste the release summary in the notes. -11. This should automatically trigger an upload of the new build to PyPI via GitHub Actions. + 10. This should automatically trigger an upload of the new build to PyPI via GitHub Actions. Check this has run [here](https://github.com/pydata/xarray/actions/workflows/pypi-release.yaml), and that the version number you expect is displayed [on PyPI](https://pypi.org/project/xarray/) -12. Update the stable branch (used by ReadTheDocs) and switch back to main: - ```sh - git switch stable - git rebase main - git push --force upstream stable - git switch main - ``` - You may need to first fetch it with `git fetch upstream`, - and check out a local version with `git checkout -b stable upstream/stable`. - - It's OK to force push to `stable` if necessary. (We also update the stable - branch with `git cherry-pick` for documentation only fixes that apply the - current released version.) -13. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst: +11. Add a section for the next release {YYYY.MM.X+1} to doc/whats-new.rst: ```rst - .. _whats-new.0.X.Y+1: + .. _whats-new.YYYY.MM.X+1: - v0.X.Y+1 (unreleased) + vYYYY.MM.X+1 (unreleased) --------------------- New Features @@ -103,17 +85,14 @@ upstream https://github.com/pydata/xarray (push) ~~~~~~~~~~~~~~~~ ``` -14. Commit your changes and push to main again: +12. Commit your changes and push to main again: ```sh git commit -am 'New whatsnew section' git push upstream main ``` You're done pushing to main! -15. Update the docs. Login to - and switch your new release tag (at the bottom) from "Inactive" to "Active". - It should now build automatically. -16. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I +13. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com @@ -130,11 +109,6 @@ upstream https://github.com/pydata/xarray (push) ## Note on version numbering -We follow a rough approximation of semantic version. Only major releases (0.X.0) -should include breaking changes. Minor releases (0.X.Y) are for bug fixes and -backwards compatible new features, but if a sufficient number of new features -have arrived we will issue a major release even if there are no compatibility -breaks. - -Once the project reaches a sufficient level of maturity for a 1.0.0 release, we -intend to follow semantic versioning more strictly. +As of 2022.02.0, we utilize the [CALVER](https://calver.org/) version system. +Specifically, we have adopted the pattern `YYYY.MM.X`, where `YYYY` is a 4-digit +year (e.g. `2022`), `MM` is a 2-digit zero-padded month (e.g. `01` for January), and `X` is the release number (starting at zero at the start of each month and incremented once for each additional release). From e939bfcf883ad9e609e9891b3e0bbeef384c0cc5 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Tue, 1 Feb 2022 00:16:20 +0100 Subject: [PATCH 24/68] `GHA` `concurrency` followup (#6223) * Update ci.yaml * Update ci-additional.yaml --- .github/workflows/ci-additional.yaml | 3 +++ .github/workflows/ci.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 2be3577d883..b476c224df6 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -16,6 +16,9 @@ jobs: detect-ci-trigger: name: detect ci trigger runs-on: ubuntu-latest + if: | + github.repository == 'pydata/xarray' + && (github.event_name == 'push' || github.event_name == 'pull_request') outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c11842bbb04..4f6cbbc3871 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,6 +16,9 @@ jobs: detect-ci-trigger: name: detect ci trigger runs-on: ubuntu-latest + if: | + github.repository == 'pydata/xarray' + && (github.event_name == 'push' || github.event_name == 'pull_request') outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: From 5973ef380d244931f0b5d2d6bbf762ea7b0a0d44 Mon Sep 17 00:00:00 2001 From: Sebastian Weigand Date: Tue, 1 Feb 2022 00:51:59 +0100 Subject: [PATCH 25/68] Fix missing dependecy definition of 'packaging' (#6207) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🩹 Added packaging to install_requires * add packaging dep to ci, install instructions, and whatsnew page * lint * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * uninstall packaging before installing upstream * update requirements.txt * update whats new and rerun linter * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: Joseph Hamman Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .binder/environment.yml | 1 + ci/install-upstream-wheels.sh | 2 ++ ci/requirements/doc.yml | 1 + ci/requirements/environment-windows.yml | 1 + ci/requirements/environment.yml | 1 + ci/requirements/py38-bare-minimum.yml | 1 + ci/requirements/py38-min-all-deps.yml | 1 + ci/requirements/py39-all-but-dask.yml | 1 + doc/getting-started-guide/installing.rst | 1 + doc/whats-new.rst | 3 ++- requirements.txt | 1 + setup.cfg | 1 + 12 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.binder/environment.yml b/.binder/environment.yml index 6caea42df87..99a7d9f2494 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -23,6 +23,7 @@ dependencies: - netcdf4 - numba - numpy + - packaging - pandas - pint - pip diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index 89cc81d0f3f..86c661be56b 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -11,6 +11,7 @@ conda uninstall -y --force \ zarr \ cftime \ rasterio \ + packaging \ pint \ sparse \ h5netcdf \ @@ -40,6 +41,7 @@ python -m pip install \ git+https://github.com/zarr-developers/zarr \ git+https://github.com/Unidata/cftime \ git+https://github.com/mapbox/rasterio \ + git+https://github.com/pypa/packaging \ git+https://github.com/hgrecco/pint \ git+https://github.com/pydata/sparse \ git+https://github.com/intake/filesystem_spec \ diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index a27a26d5cac..e5fcc500f70 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -19,6 +19,7 @@ dependencies: - netcdf4>=1.5 - numba - numpy>=1.17 + - packaging>=20.0 - pandas>=1.0 - pooch - pip diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 05fa5fecba0..6c389c22ce6 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -22,6 +22,7 @@ dependencies: - netcdf4 - numba - numpy + - packaging - pandas - pint - pip diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 46371247c4d..516c964afc7 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -25,6 +25,7 @@ dependencies: - numba - numexpr - numpy + - packaging - pandas - pint - pip diff --git a/ci/requirements/py38-bare-minimum.yml b/ci/requirements/py38-bare-minimum.yml index c6e3ac504a8..5986ec7186b 100644 --- a/ci/requirements/py38-bare-minimum.yml +++ b/ci/requirements/py38-bare-minimum.yml @@ -11,4 +11,5 @@ dependencies: - pytest-env - pytest-xdist - numpy=1.18 + - packaging=20.0 - pandas=1.1 diff --git a/ci/requirements/py38-min-all-deps.yml b/ci/requirements/py38-min-all-deps.yml index a6459b92ccb..76e2b28093d 100644 --- a/ci/requirements/py38-min-all-deps.yml +++ b/ci/requirements/py38-min-all-deps.yml @@ -33,6 +33,7 @@ dependencies: - netcdf4=1.5.3 - numba=0.51 - numpy=1.18 + - packaging=20.0 - pandas=1.1 - pint=0.16 - pip diff --git a/ci/requirements/py39-all-but-dask.yml b/ci/requirements/py39-all-but-dask.yml index 21217e79c7c..9c42d5f3b73 100644 --- a/ci/requirements/py39-all-but-dask.yml +++ b/ci/requirements/py39-all-but-dask.yml @@ -23,6 +23,7 @@ dependencies: - netcdf4 - numba - numpy + - packaging - pandas - pint - pip diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index 6f437a2dc4c..c14e7d36579 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -8,6 +8,7 @@ Required dependencies - Python (3.8 or later) - `numpy `__ (1.18 or later) +- `packaging `__ (20.0 or later) - `pandas `__ (1.1 or later) .. _optional-dependencies: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a33f557a179..520be67211d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,8 @@ Deprecations Bug fixes ~~~~~~~~~ - +- Add `packaging` as a dependency to Xarray (:issue:`6216`, :pull:`6207`). + By `Sebastian Weigand `_ and `Joe Hamman `_. Documentation ~~~~~~~~~~~~~ diff --git a/requirements.txt b/requirements.txt index 729a3655125..37417908cf4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ # https://help.github.com/en/github/visualizing-repository-data-with-graphs/listing-the-packages-that-a-repository-depends-on numpy >= 1.18 +packaging >= 20.0 pandas >= 1.1 diff --git a/setup.cfg b/setup.cfg index f9e0afa6445..f9f8ae5c4dc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -77,6 +77,7 @@ python_requires = >=3.8 install_requires = numpy >= 1.18 pandas >= 1.1 + packaging >= 20.0 [options.extras_require] io = From fe491b14b113c185b5b9a18e4f643e5a73208629 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 31 Jan 2022 21:37:17 -0800 Subject: [PATCH 26/68] Add release notes for 0.21.1 --- doc/whats-new.rst | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 520be67211d..58f043f1a30 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,33 +16,16 @@ What's New .. _whats-new.0.21.1: -v0.21.1 (unreleased) --------------------- - -New Features -~~~~~~~~~~~~ - - -Breaking changes -~~~~~~~~~~~~~~~~ - - -Deprecations -~~~~~~~~~~~~ +v0.21.1 (31 January 2022) +------------------------- +This is a bugfix release to resolve (:issue:`6216`, :pull:`6207`). Bug fixes ~~~~~~~~~ - Add `packaging` as a dependency to Xarray (:issue:`6216`, :pull:`6207`). By `Sebastian Weigand `_ and `Joe Hamman `_. -Documentation -~~~~~~~~~~~~~ - - -Internal Changes -~~~~~~~~~~~~~~~~ - .. _whats-new.0.21.0: From 0c2b6e9c8f9b749a43bd0166c5f7f79b78b6dacb Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 1 Feb 2022 01:39:10 -0800 Subject: [PATCH 27/68] Add whatsnew section for v2022.02.0 (#6225) --- doc/whats-new.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 58f043f1a30..93271cca3e4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,35 @@ What's New np.random.seed(123456) +.. _whats-new.2022.02.0: + +v2022.02.0 (unreleased) +----------------------- + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + .. _whats-new.0.21.1: v0.21.1 (31 January 2022) From 86328a1cfe5296f8e478b17e52ba884db2384872 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 1 Feb 2022 10:40:11 +0100 Subject: [PATCH 28/68] fix or suppress test warnings (#6213) * fix & suppress test warnings * fix * for mpl3.5 * for older pandas * another one * move warning filter * [test-upstream] * two more --- xarray/backends/plugins.py | 9 +-------- xarray/coding/cftimeindex.py | 2 +- xarray/core/common.py | 6 +++--- xarray/core/groupby.py | 4 ++-- xarray/core/missing.py | 2 +- xarray/plot/utils.py | 3 ++- xarray/tests/test_accessor_dt.py | 4 ++-- xarray/tests/test_backends.py | 6 +++++- xarray/tests/test_coarsen.py | 2 +- xarray/tests/test_computation.py | 1 + xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_distributed.py | 1 + xarray/tests/test_groupby.py | 6 +++--- xarray/tests/test_sparse.py | 2 +- xarray/tests/test_variable.py | 16 ++++++++++++---- xarray/tests/test_weighted.py | 2 ++ 16 files changed, 39 insertions(+), 29 deletions(-) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index f03782321e7..a45ee78efd0 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -1,18 +1,11 @@ import functools import inspect import itertools -import sys import warnings +from importlib.metadata import entry_points from .common import BACKEND_ENTRYPOINTS, BackendEntrypoint -if sys.version_info >= (3, 8): - from importlib.metadata import entry_points -else: - # if the fallback library is missing, we are doomed. - from importlib_metadata import entry_points - - STANDARD_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 62d7116a658..ac6904d4e31 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -407,7 +407,7 @@ def _partial_date_slice(self, resolution, parsed): times = self._data - if self.is_monotonic: + if self.is_monotonic_increasing: if len(times) and ( (start < times[0] and end < times[0]) or (start > times[-1] and end > times[-1]) diff --git a/xarray/core/common.py b/xarray/core/common.py index 039b03aec56..bee59c6cc7d 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -853,7 +853,7 @@ def rolling( ... np.linspace(0, 11, num=12), ... coords=[ ... pd.date_range( - ... "15/12/1999", + ... "1999-12-15", ... periods=12, ... freq=pd.DateOffset(months=1), ... ) @@ -966,7 +966,7 @@ def coarsen( >>> da = xr.DataArray( ... np.linspace(0, 364, num=364), ... dims="time", - ... coords={"time": pd.date_range("15/12/1999", periods=364)}, + ... coords={"time": pd.date_range("1999-12-15", periods=364)}, ... ) >>> da # +doctest: ELLIPSIS @@ -1062,7 +1062,7 @@ def resample( ... np.linspace(0, 11, num=12), ... coords=[ ... pd.date_range( - ... "15/12/1999", + ... "1999-12-15", ... periods=12, ... freq=pd.DateOffset(months=1), ... ) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 185b4ae5bec..e9f485a2393 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -201,7 +201,7 @@ def _unique_and_monotonic(group): if isinstance(group, _DummyGroup): return True index = safe_cast_to_index(group) - return index.is_unique and index.is_monotonic + return index.is_unique and index.is_monotonic_increasing def _apply_loffset(grouper, result): @@ -343,7 +343,7 @@ def __init__( if grouper is not None: index = safe_cast_to_index(group) - if not index.is_monotonic: + if not index.is_monotonic_increasing: # TODO: sort instead of raising an error raise ValueError("index must be monotonic for resampling") full_index, first_items = self._get_index_and_items(index, grouper) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index f3bb5351db5..39e7730dd58 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -266,7 +266,7 @@ def get_clean_interp_index( index.name = dim if strict: - if not index.is_monotonic: + if not index.is_monotonic_increasing: raise ValueError(f"Index {index.name!r} must be monotonically increasing") if not index.is_unique: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 3b2a133b3e5..f09d1eb1853 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -1036,7 +1036,8 @@ def _get_color_and_size(value): if label_values_are_numeric: label_values_min = label_values.min() label_values_max = label_values.max() - fmt.set_bounds(label_values_min, label_values_max) + fmt.axis.set_view_interval(label_values_min, label_values_max) + fmt.axis.set_data_interval(label_values_min, label_values_max) if num is not None: # Labels are numerical but larger than the target diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 0cb11607435..92a8b8a0e39 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -97,7 +97,7 @@ def test_field_access(self, field) -> None: def test_isocalendar(self, field, pandas_field) -> None: # pandas isocalendar has dtypy UInt32Dtype, convert to Int64 - expected = pd.Int64Index(getattr(self.times.isocalendar(), pandas_field)) + expected = pd.Index(getattr(self.times.isocalendar(), pandas_field).astype(int)) expected = xr.DataArray( expected, name=field, coords=[self.times], dims=["time"] ) @@ -435,7 +435,7 @@ def test_calendar_dask() -> None: # 3D lazy dask - np data = xr.DataArray( - da.random.random_integers(1, 1000000, size=(4, 5, 6)).astype(" None: da = xr.DataArray( np.linspace(0, 365, num=364), dims="time", - coords={"time": pd.date_range("15/12/1999", periods=364)}, + coords={"time": pd.date_range("1999-12-15", periods=364)}, ) actual = da.coarsen(time=2).mean() diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index de85c112048..7f601c6195a 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1554,6 +1554,7 @@ def test_covcorr_consistency(da_a, da_b, dim) -> None: @requires_dask @pytest.mark.parametrize("da_a, da_b", arrays_w_tuples()[1]) @pytest.mark.parametrize("dim", [None, "time", "x"]) +@pytest.mark.filterwarnings("ignore:invalid value encountered in .*divide") def test_corr_lazycorr_consistency(da_a, da_b, dim) -> None: da_al = da_a.chunk() da_bl = da_b.chunk() diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 61bd19f1515..a74556f882f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2162,7 +2162,7 @@ def test_stack_unstack(self): # test GH3000 a = orig[:0, :1].stack(dim=("x", "y")).dim.to_index() b = pd.MultiIndex( - levels=[pd.Int64Index([]), pd.Int64Index([0])], + levels=[pd.Index([], np.int64), pd.Index([0], np.int64)], codes=[[], []], names=["x", "y"], ) diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index 88811c93776..f70e1c7958e 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -160,6 +160,7 @@ def test_dask_distributed_zarr_integration_test(loop, consolidated, compute) -> @requires_rasterio +@pytest.mark.filterwarnings("ignore:deallocating CachingFileManager") def test_dask_distributed_rasterio_integration_test(loop) -> None: with create_tmp_geotiff() as (tmp_file, expected): with cluster() as (s, [a, b]): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 3a9e34d1e2d..fffc0c3708a 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -765,7 +765,7 @@ def test_groupby_math_nD_group() -> None: g = da.groupby_bins("num2d", bins=[0, 4, 6]) mean = g.mean() idxr = np.digitize(da.num2d, bins=(0, 4, 6), right=True)[:30, :] - 1 - expanded_mean = mean.drop("num2d_bins").isel(num2d_bins=(("x", "y"), idxr)) + expanded_mean = mean.drop_vars("num2d_bins").isel(num2d_bins=(("x", "y"), idxr)) expected = da.isel(x=slice(30)) - expanded_mean expected["labels"] = expected.labels.broadcast_like(expected.labels2d) expected["num"] = expected.num.broadcast_like(expected.num2d) @@ -1251,7 +1251,7 @@ def test_groupby_bins_sort(self): np.arange(100), dims="x", coords={"x": np.linspace(-100, 100, num=100)} ) binned_mean = data.groupby_bins("x", bins=11).mean() - assert binned_mean.to_index().is_monotonic + assert binned_mean.to_index().is_monotonic_increasing def test_groupby_assign_coords(self): @@ -1426,7 +1426,7 @@ def test_upsample(self): # Pad actual = array.resample(time="3H").pad() - expected = DataArray(array.to_series().resample("3H").pad()) + expected = DataArray(array.to_series().resample("3H").ffill()) assert_identical(expected, actual) # Nearest diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 651a0f64d2a..bf4d39105c4 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -800,7 +800,7 @@ def test_resample(self): t1 = xr.DataArray( np.linspace(0, 11, num=12), coords=[ - pd.date_range("15/12/1999", periods=12, freq=pd.DateOffset(months=1)) + pd.date_range("1999-12-15", periods=12, freq=pd.DateOffset(months=1)) ], dims="time", ) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index f98aaaa5257..4170267b89c 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2613,10 +2613,14 @@ def test_from_dask(self, Var): @requires_pint def test_from_pint(self, Var): - from pint import Quantity + import pint arr = np.array([1, 2, 3]) - v = Var("x", Quantity(arr, units="m")) + + # IndexVariable strips the unit + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=pint.UnitStrippedWarning) + v = Var("x", pint.Quantity(arr, units="m")) assert_identical(v.as_numpy(), Var("x", arr)) np.testing.assert_equal(v.to_numpy(), arr) @@ -2649,11 +2653,15 @@ def test_from_cupy(self, Var): @requires_pint def test_from_pint_wrapping_dask(self, Var): import dask - from pint import Quantity + import pint arr = np.array([1, 2, 3]) d = dask.array.from_array(np.array([1, 2, 3])) - v = Var("x", Quantity(d, units="m")) + + # IndexVariable strips the unit + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=pint.UnitStrippedWarning) + v = Var("x", pint.Quantity(d, units="m")) result = v.as_numpy() assert_identical(result, Var("x", arr)) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index fb057057d2d..1f065228bc4 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -443,6 +443,7 @@ def check_weighted_operations(data, weights, dim, skipna): @pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None)) @pytest.mark.parametrize("add_nans", (True, False)) @pytest.mark.parametrize("skipna", (None, True, False)) +@pytest.mark.filterwarnings("ignore:invalid value encountered in sqrt") def test_weighted_operations_3D(dim, add_nans, skipna): dims = ("a", "b", "c") @@ -480,6 +481,7 @@ def test_weighted_operations_nonequal_coords(): @pytest.mark.parametrize("shape_weights", ((4,), (4, 4), (4, 4, 4))) @pytest.mark.parametrize("add_nans", (True, False)) @pytest.mark.parametrize("skipna", (None, True, False)) +@pytest.mark.filterwarnings("ignore:invalid value encountered in sqrt") def test_weighted_operations_different_shapes( shape_data, shape_weights, add_nans, skipna ): From 33067cd24f66d4855babaa6801b009480c4e2cb2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 3 Feb 2022 08:41:17 -0700 Subject: [PATCH 29/68] Verify built dist/wheel (#6224) Co-authored-by: keewis --- .github/workflows/pypi-release.yaml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 7bc35952729..f09291b9c6e 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -62,6 +62,14 @@ jobs: run: | ls -ltrh ls -ltrh dist + + - name: Verify the built dist/wheel is valid + if: github.event_name == 'push' + run: | + python -m pip install --upgrade pip + python -m pip install dist/xarray*.whl + python -m xarray.util.print_versions + - name: Publish package to TestPyPI if: github.event_name == 'push' uses: pypa/gh-action-pypi-publish@v1.5.0 @@ -71,13 +79,6 @@ jobs: repository_url: https://test.pypi.org/legacy/ verbose: true - - name: Check uploaded package - if: github.event_name == 'push' - run: | - sleep 3 - python -m pip install --upgrade pip - python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade xarray - python -m xarray.util.print_versions upload-to-pypi: needs: test-built-dist From 56122ef34b10d1e586e3ed324daf4df38e6dee12 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 5 Feb 2022 22:29:36 +0100 Subject: [PATCH 30/68] Run pyupgrade on core/utils (#6240) * Run pyupgrade on core/utils * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- xarray/core/utils.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index a9ea0acb267..da3a196a621 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -1,4 +1,6 @@ """Internal utilities; not for external use""" +from __future__ import annotations + import contextlib import functools import io @@ -14,18 +16,14 @@ Callable, Collection, Container, - Dict, Hashable, Iterable, Iterator, Mapping, MutableMapping, MutableSet, - Optional, Sequence, - Tuple, TypeVar, - Union, cast, ) @@ -188,7 +186,7 @@ def list_equiv(first, second): return equiv -def peek_at(iterable: Iterable[T]) -> Tuple[T, Iterator[T]]: +def peek_at(iterable: Iterable[T]) -> tuple[T, Iterator[T]]: """Returns the first value from iterable, as well as a new iterator with the same content as the original iterable """ @@ -273,7 +271,7 @@ def is_duck_array(value: Any) -> bool: def either_dict_or_kwargs( - pos_kwargs: Optional[Mapping[Any, T]], + pos_kwargs: Mapping[Any, T] | None, kw_kwargs: Mapping[str, T], func_name: str, ) -> Mapping[Hashable, T]: @@ -511,7 +509,7 @@ class OrderedSet(MutableSet[T]): a dict. Note that, unlike in an OrderedDict, equality tests are not order-sensitive. """ - _d: Dict[T, None] + _d: dict[T, None] __slots__ = ("_d",) @@ -585,7 +583,7 @@ def dtype(self: Any) -> np.dtype: return self.array.dtype @property - def shape(self: Any) -> Tuple[int]: + def shape(self: Any) -> tuple[int]: return self.array.shape def __getitem__(self: Any, key): @@ -659,7 +657,7 @@ def read_magic_number_from_file(filename_or_obj, count=8) -> bytes: return magic_number -def try_read_magic_number_from_path(pathlike, count=8) -> Optional[bytes]: +def try_read_magic_number_from_path(pathlike, count=8) -> bytes | None: if isinstance(pathlike, str) or hasattr(pathlike, "__fspath__"): path = os.fspath(pathlike) try: @@ -670,9 +668,7 @@ def try_read_magic_number_from_path(pathlike, count=8) -> Optional[bytes]: return None -def try_read_magic_number_from_file_or_path( - filename_or_obj, count=8 -) -> Optional[bytes]: +def try_read_magic_number_from_file_or_path(filename_or_obj, count=8) -> bytes | None: magic_number = try_read_magic_number_from_path(filename_or_obj, count) if magic_number is None: try: @@ -706,7 +702,7 @@ def hashable(v: Any) -> bool: return True -def decode_numpy_dict_values(attrs: Mapping[K, V]) -> Dict[K, V]: +def decode_numpy_dict_values(attrs: Mapping[K, V]) -> dict[K, V]: """Convert attribute values from numpy objects to native Python objects, for use in to_dict """ @@ -815,7 +811,7 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: def drop_dims_from_indexers( indexers: Mapping[Any, Any], - dims: Union[list, Mapping[Any, int]], + dims: list | Mapping[Any, int], missing_dims: str, ) -> Mapping[Hashable, Any]: """Depending on the setting of missing_dims, drop any dimensions from indexers that From 597a00c9a9c732f1d40a7f7681cd2fe45f885d74 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 5 Feb 2022 13:59:06 -0800 Subject: [PATCH 31/68] Remove old PR template (#6241) --- .github/PULL_REQUEST_TEMPLATE.md | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 37b8d357c87..00000000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,6 +0,0 @@ - - -- [ ] Closes #xxxx -- [ ] Tests added -- [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` -- [ ] New functions/methods are listed in `api.rst` From 73d0b535b735f6975b693e614fe799565c70df15 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 5 Feb 2022 14:57:33 -0800 Subject: [PATCH 32/68] Remove default issue & PR titles (#6242) I would vote to use labels rather than strings here -- it clusters the names (but no strong view, feel free to close if ppl disagree) --- .github/ISSUE_TEMPLATE/bugreport.yml | 10 ++++------ .github/ISSUE_TEMPLATE/newfeature.yml | 2 -- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bugreport.yml b/.github/ISSUE_TEMPLATE/bugreport.yml index 255c7de07d9..bb1febae2ff 100644 --- a/.github/ISSUE_TEMPLATE/bugreport.yml +++ b/.github/ISSUE_TEMPLATE/bugreport.yml @@ -1,8 +1,6 @@ name: Bug Report description: File a bug report to help us improve -title: '[Bug]: ' -labels: [bug, 'needs triage'] -assignees: [] +labels: [bug, "needs triage"] body: - type: textarea id: what-happened @@ -54,8 +52,8 @@ body: - type: textarea id: show-versions attributes: - label: Environment - description: | - Paste the output of `xr.show_versions()` here + label: Environment + description: | + Paste the output of `xr.show_versions()` here validations: required: true diff --git a/.github/ISSUE_TEMPLATE/newfeature.yml b/.github/ISSUE_TEMPLATE/newfeature.yml index ec94b0f4b89..77cb15b7d37 100644 --- a/.github/ISSUE_TEMPLATE/newfeature.yml +++ b/.github/ISSUE_TEMPLATE/newfeature.yml @@ -1,8 +1,6 @@ name: Feature Request description: Suggest an idea for xarray -title: '[FEATURE]: ' labels: [enhancement] -assignees: [] body: - type: textarea id: description From 1d2ed220da3c50846a3b5e69098e55aa768d75f1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 6 Feb 2022 06:26:07 -0800 Subject: [PATCH 33/68] Revert "Remove old PR template (#6241)" (#6246) This reverts commit 597a00c9a9c732f1d40a7f7681cd2fe45f885d74. --- .github/PULL_REQUEST_TEMPLATE.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000000..37b8d357c87 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,6 @@ + + +- [ ] Closes #xxxx +- [ ] Tests added +- [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` +- [ ] New functions/methods are listed in `api.rst` From 52a051a784249377698ca2eb50c800513a30e7ba Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 7 Feb 2022 10:05:24 +0100 Subject: [PATCH 34/68] test bottleneck master in upstream CI [test-upstream] (#6248) --- ci/install-upstream-wheels.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index 86c661be56b..5fde7045b7d 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -13,10 +13,10 @@ conda uninstall -y --force \ rasterio \ packaging \ pint \ + bottleneck \ sparse \ h5netcdf \ xarray - # bottleneck \ # re-enable again, see https://github.com/pydata/bottleneck/pull/378 # to limit the runtime of Upstream CI python -m pip install pytest-timeout python -m pip install \ @@ -43,8 +43,8 @@ python -m pip install \ git+https://github.com/mapbox/rasterio \ git+https://github.com/pypa/packaging \ git+https://github.com/hgrecco/pint \ + git+https://github.com/pydata/bottleneck \ git+https://github.com/pydata/sparse \ git+https://github.com/intake/filesystem_spec \ git+https://github.com/SciTools/nc-time-axis \ git+https://github.com/h5netcdf/h5netcdf - # git+https://github.com/pydata/bottleneck \ # re-enable again, see https://github.com/pydata/bottleneck/pull/378 From d47cf0c850cb70429373782b3c1e0329d14fd05a Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 7 Feb 2022 10:40:05 +0100 Subject: [PATCH 35/68] quantile: rename interpolation arg to method (#6108) * quantile: rename interpolation arg to method * add whats new entry * Apply suggestions from code review * fix ArrayLike * type dim * cleanup * update docstrings * indentation and quotation marks * use Literal * update whats new * remove newline --- doc/whats-new.rst | 3 ++ xarray/core/dataarray.py | 57 ++++++++++++++++------- xarray/core/dataset.py | 85 +++++++++++++++++++++++----------- xarray/core/groupby.py | 57 +++++++++++++++++------ xarray/core/npcompat.py | 28 ++++++++++- xarray/core/variable.py | 77 ++++++++++++++++++++++++------ xarray/tests/test_dataarray.py | 34 +++++++++++++- xarray/tests/test_dataset.py | 32 ++++++++++++- xarray/tests/test_groupby.py | 44 ++++++++++++++++++ xarray/tests/test_variable.py | 44 ++++++++++++++++++ 10 files changed, 386 insertions(+), 75 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 93271cca3e4..a8cd952609c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,9 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`) + to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`). + By `Mathias Hauser `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 81aaf5a50e0..6fe865a9f64 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -53,6 +53,7 @@ from .indexes import Index, Indexes, default_indexes, propagate_indexes from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords +from .npcompat import QUANTILE_METHODS, ArrayLike from .options import OPTIONS, _get_keep_attrs from .utils import ( Default, @@ -3426,11 +3427,12 @@ def sortby( def quantile( self, - q: Any, - dim: Hashable | Sequence[Hashable] | None = None, - interpolation: str = "linear", + q: ArrayLike, + dim: str | Sequence[Hashable] | None = None, + method: QUANTILE_METHODS = "linear", keep_attrs: bool = None, skipna: bool = True, + interpolation: QUANTILE_METHODS = None, ) -> DataArray: """Compute the qth quantile of the data along the specified dimension. @@ -3442,18 +3444,34 @@ def quantile( Quantile to compute, which must be between 0 and 1 inclusive. dim : hashable or sequence of hashable, optional Dimension(s) over which to apply quantile. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - - linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - - lower: ``i``. - - higher: ``j``. - - nearest: ``i`` or ``j``, whichever is nearest. - - midpoint: ``(i + j) / 2``. + method : str, default: "linear" + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default "linear" (7.) option are also available: + + * "lower" + * "higher" + * "midpoint" + * "nearest" + + See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. + keep_attrs : bool, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -3505,14 +3523,21 @@ def quantile( Coordinates: * y (y) float64 1.0 1.5 2.0 2.5 * quantile (quantile) float64 0.0 0.5 1.0 + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ ds = self._to_temp_dataset().quantile( q, dim=dim, keep_attrs=keep_attrs, - interpolation=interpolation, + method=method, skipna=skipna, + interpolation=interpolation, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 29e8de39f7a..83126f157a4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -77,6 +77,7 @@ merge_data_and_coords, ) from .missing import get_clean_interp_index +from .npcompat import QUANTILE_METHODS, ArrayLike from .options import OPTIONS, _get_keep_attrs from .pycompat import is_duck_dask_array, sparse_array_type from .utils import ( @@ -6137,12 +6138,13 @@ def sortby(self, variables, ascending=True): def quantile( self, - q, - dim=None, - interpolation="linear", - numeric_only=False, - keep_attrs=None, - skipna=True, + q: ArrayLike, + dim: str | Iterable[Hashable] | None = None, + method: QUANTILE_METHODS = "linear", + numeric_only: bool = False, + keep_attrs: bool = None, + skipna: bool = True, + interpolation: QUANTILE_METHODS = None, ): """Compute the qth quantile of the data along the specified dimension. @@ -6155,18 +6157,34 @@ def quantile( Quantile to compute, which must be between 0 and 1 inclusive. dim : str or sequence of str, optional Dimension(s) over which to apply quantile. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + method : str, default: "linear" + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default "linear" (7.) option are also available: + + * "lower" + * "higher" + * "midpoint" + * "nearest" + + See :py:func:`numpy.quantile` or [1]_ for a description. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. + keep_attrs : bool, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -6225,17 +6243,37 @@ def quantile( * quantile (quantile) float64 0.0 0.5 1.0 Data variables: a (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9 + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ + # interpolation renamed to method in version 0.21.0 + # check here and in variable to avoid repeated warnings + if interpolation is not None: + warnings.warn( + "The `interpolation` argument to quantile was renamed to `method`.", + FutureWarning, + ) + + if method != "linear": + raise TypeError("Cannot pass interpolation and method keywords!") + + method = interpolation + + dims: set[Hashable] if isinstance(dim, str): dims = {dim} - elif dim in [None, ...]: + elif dim is None or dim is ...: dims = set(self.dims) else: dims = set(dim) _assert_empty( - [d for d in dims if d not in self.dims], + tuple(d for d in dims if d not in self.dims), "Dataset does not contain the dimensions: %s", ) @@ -6251,15 +6289,10 @@ def quantile( or np.issubdtype(var.dtype, np.number) or var.dtype == np.bool_ ): - if len(reduce_dims) == var.ndim: - # prefer to aggregate over axis=None rather than - # axis=(0, 1) if they will be equivalent, because - # the former is often more efficient - reduce_dims = None variables[name] = var.quantile( q, dim=reduce_dims, - interpolation=interpolation, + method=method, keep_attrs=keep_attrs, skipna=skipna, ) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e9f485a2393..d7aa6749592 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -549,7 +549,13 @@ def fillna(self, value): return ops.fillna(self, value) def quantile( - self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + self, + q, + dim=None, + method="linear", + keep_attrs=None, + skipna=True, + interpolation=None, ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -562,18 +568,34 @@ def quantile( dim : ..., str or sequence of str, optional Dimension(s) over which to apply quantile. Defaults to the grouped dimension. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + method : str, default: "linear" + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default "linear" (7.) option are also available: + + * "lower" + * "higher" + * "midpoint" + * "nearest" + + See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. + skipna : bool, optional Whether to skip missing values when aggregating. @@ -639,6 +661,12 @@ def quantile( * y (y) int64 1 2 Data variables: a (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4 + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ if dim is None: dim = self._group_dim @@ -648,9 +676,10 @@ def quantile( shortcut=False, q=q, dim=dim, - interpolation=interpolation, + method=method, keep_attrs=keep_attrs, skipna=skipna, + interpolation=interpolation, ) return out diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 1eaa2728e8a..b5b98052fe9 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -28,7 +28,7 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from typing import TYPE_CHECKING, Any, Sequence, TypeVar, Union +from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar, Union import numpy as np from packaging.version import Version @@ -169,3 +169,29 @@ def sliding_window_view( return as_strided( x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable ) + + +if Version(np.__version__) >= Version("1.22.0"): + QUANTILE_METHODS = Literal[ + "inverted_cdf", + "averaged_inverted_cdf", + "closest_observation", + "interpolated_inverted_cdf", + "hazen", + "weibull", + "linear", + "median_unbiased", + "normal_unbiased", + "lower", + "higher", + "midpoint", + "nearest", + ] +else: + QUANTILE_METHODS = Literal[ # type: ignore[misc] + "linear", + "lower", + "higher", + "midpoint", + "nearest", + ] diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 08af2e694df..6db795ce26f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd +from packaging.version import Version import xarray as xr # only for Dataset and DataArray @@ -24,6 +25,7 @@ VectorizedIndexer, as_indexable, ) +from .npcompat import QUANTILE_METHODS, ArrayLike from .options import OPTIONS, _get_keep_attrs from .pycompat import ( DuckArrayModule, @@ -1971,8 +1973,14 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): return self.broadcast_equals(other, equiv=equiv) def quantile( - self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True - ): + self, + q: ArrayLike, + dim: str | Sequence[Hashable] | None = None, + method: QUANTILE_METHODS = "linear", + keep_attrs: bool = None, + skipna: bool = True, + interpolation: QUANTILE_METHODS = None, + ) -> Variable: """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1984,18 +1992,34 @@ def quantile( inclusive. dim : str or sequence of str, optional Dimension(s) over which to apply quantile. - interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear" - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + method : str, default: "linear" + This optional parameter specifies the interpolation method to use when the + desired quantile lies between two data points. The options sorted by their R + type as summarized in the H&F paper [1]_ are: + + 1. "inverted_cdf" (*) + 2. "averaged_inverted_cdf" (*) + 3. "closest_observation" (*) + 4. "interpolated_inverted_cdf" (*) + 5. "hazen" (*) + 6. "weibull" (*) + 7. "linear" (default) + 8. "median_unbiased" (*) + 9. "normal_unbiased" (*) + + The first three methods are discontiuous. The following discontinuous + variations of the default "linear" (7.) option are also available: + + * "lower" + * "higher" + * "midpoint" + * "nearest" + + See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with + an asterix require numpy version 1.22 or newer. The "method" argument was + previously called "interpolation", renamed in accordance with numpy + version 1.22.0. + keep_attrs : bool, optional If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -2014,10 +2038,27 @@ def quantile( -------- numpy.nanquantile, pandas.Series.quantile, Dataset.quantile DataArray.quantile + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 """ from .computation import apply_ufunc + if interpolation is not None: + warnings.warn( + "The `interpolation` argument to quantile was renamed to `method`.", + FutureWarning, + ) + + if method != "linear": + raise TypeError("Cannot pass interpolation and method keywords!") + + method = interpolation + _quantile_func = np.nanquantile if skipna else np.quantile if keep_attrs is None: @@ -2037,6 +2078,12 @@ def _wrapper(npa, **kwargs): return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) + + if Version(np.__version__) >= Version("1.22.0"): + kwargs = {"q": q, "axis": axis, "method": method} + else: + kwargs = {"q": q, "axis": axis, "interpolation": method} + result = apply_ufunc( _wrapper, self, @@ -2046,7 +2093,7 @@ def _wrapper(npa, **kwargs): output_dtypes=[np.float64], dask_gufunc_kwargs=dict(output_sizes={"quantile": len(q)}), dask="parallelized", - kwargs={"q": q, "axis": axis, "interpolation": interpolation}, + kwargs=kwargs, ) # for backward compatibility diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a74556f882f..b707ae2a063 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2520,7 +2520,7 @@ def test_reduce_out(self): @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim, skipna): + def test_quantile(self, q, axis, dim, skipna) -> None: actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) _percentile_func = np.nanpercentile if skipna else np.percentile expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) @@ -2532,6 +2532,38 @@ def test_quantile(self, q, axis, dim, skipna): assert actual.attrs == self.attrs + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_method(self, method) -> None: + q = [0.25, 0.5, 0.75] + actual = DataArray(self.va).quantile(q, method=method) + + if Version(np.__version__) >= Version("1.22.0"): + expected = np.nanquantile(self.dv.values, np.array(q), method=method) # type: ignore[call-arg] + else: + expected = np.nanquantile(self.dv.values, np.array(q), interpolation=method) # type: ignore[call-arg] + + np.testing.assert_allclose(actual.values, expected) + + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_interpolation_deprecated(self, method) -> None: + + da = DataArray(self.va) + q = [0.25, 0.5, 0.75] + + with pytest.warns( + FutureWarning, + match="`interpolation` argument to quantile was renamed to `method`", + ): + actual = da.quantile(q, interpolation=method) + + expected = da.quantile(q, method=method) + + np.testing.assert_allclose(actual.values, expected.values) + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + da.quantile(q, method=method, interpolation=method) + def test_reduce_keep_attrs(self): # Test dropped attrs vm = self.va.mean() diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3418c2e55e6..fed886465ed 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4719,7 +4719,7 @@ def test_reduce_keepdims(self): @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q, skipna): + def test_quantile(self, q, skipna) -> None: ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: @@ -4740,7 +4740,7 @@ def test_quantile(self, q, skipna): assert all(d not in ds_quantile.dims for d in dim) @pytest.mark.parametrize("skipna", [True, False]) - def test_quantile_skipna(self, skipna): + def test_quantile_skipna(self, skipna) -> None: q = 0.1 dim = "time" ds = Dataset({"a": ([dim], np.arange(0, 11))}) @@ -4752,6 +4752,34 @@ def test_quantile_skipna(self, skipna): expected = Dataset({"a": value}, coords={"quantile": q}) assert_identical(result, expected) + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_method(self, method) -> None: + + ds = create_test_data(seed=123) + q = [0.25, 0.5, 0.75] + + result = ds.quantile(q, method=method) + + assert_identical(result.var1, ds.var1.quantile(q, method=method)) + assert_identical(result.var2, ds.var2.quantile(q, method=method)) + assert_identical(result.var3, ds.var3.quantile(q, method=method)) + + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_interpolation_deprecated(self, method) -> None: + + ds = create_test_data(seed=123) + q = [0.25, 0.5, 0.75] + + with warnings.catch_warnings(record=True) as w: + ds.quantile(q, interpolation=method) + + # ensure the warning is only raised once + assert len(w) == 1 + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + ds.quantile(q, method=method, interpolation=method) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index fffc0c3708a..1ec2a53c131 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1,3 +1,6 @@ +import warnings +from typing import Union + import numpy as np import pandas as pd import pytest @@ -273,6 +276,15 @@ def test_da_groupby_quantile() -> None: ) assert_identical(expected, actual) + # method keyword + array = xr.DataArray(data=[1, 2, 3, 4], coords={"x": [1, 1, 2, 2]}, dims="x") + + expected = xr.DataArray( + data=[1, 3], coords={"x": [1, 2], "quantile": 0.5}, dims="x" + ) + actual = array.groupby("x").quantile(0.5, method="lower") + assert_identical(expected, actual) + def test_ds_groupby_quantile() -> None: ds = xr.Dataset( @@ -367,6 +379,38 @@ def test_ds_groupby_quantile() -> None: ) assert_identical(expected, actual) + ds = xr.Dataset(data_vars={"a": ("x", [1, 2, 3, 4])}, coords={"x": [1, 1, 2, 2]}) + + # method keyword + expected = xr.Dataset( + data_vars={"a": ("x", [1, 3])}, coords={"quantile": 0.5, "x": [1, 2]} + ) + actual = ds.groupby("x").quantile(0.5, method="lower") + assert_identical(expected, actual) + + +@pytest.mark.parametrize("as_dataset", [False, True]) +def test_groupby_quantile_interpolation_deprecated(as_dataset) -> None: + + array = xr.DataArray(data=[1, 2, 3, 4], coords={"x": [1, 1, 2, 2]}, dims="x") + + arr: Union[xr.DataArray, xr.Dataset] + arr = array.to_dataset(name="name") if as_dataset else array + + with pytest.warns( + FutureWarning, + match="`interpolation` argument to quantile was renamed to `method`", + ): + actual = arr.quantile(0.5, interpolation="lower") + + expected = arr.quantile(0.5, method="lower") + + assert_identical(actual, expected) + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + arr.quantile(0.5, method="lower", interpolation="lower") + def test_da_groupby_assign_coords() -> None: actual = xr.DataArray( diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 4170267b89c..33fff62c304 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -7,6 +7,7 @@ import pandas as pd import pytest import pytz +from packaging.version import Version from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options from xarray.core import dtypes, duck_array_ops, indexing @@ -1720,6 +1721,49 @@ def test_quantile_dask(self, q, axis, dim): expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + @pytest.mark.parametrize( + "use_dask", [pytest.param(True, marks=requires_dask), False] + ) + def test_quantile_method(self, method, use_dask) -> None: + + v = Variable(["x", "y"], self.d) + if use_dask: + v = v.chunk({"x": 2}) + + q = np.array([0.25, 0.5, 0.75]) + actual = v.quantile(q, dim="y", method=method) + + if Version(np.__version__) >= Version("1.22"): + expected = np.nanquantile(self.d, q, axis=1, method=method) # type: ignore[call-arg] + else: + expected = np.nanquantile(self.d, q, axis=1, interpolation=method) # type: ignore[call-arg] + + if use_dask: + assert isinstance(actual.data, dask_array_type) + + np.testing.assert_allclose(actual.values, expected) + + @pytest.mark.parametrize("method", ["midpoint", "lower"]) + def test_quantile_interpolation_deprecation(self, method) -> None: + + v = Variable(["x", "y"], self.d) + q = np.array([0.25, 0.5, 0.75]) + + with pytest.warns( + FutureWarning, + match="`interpolation` argument to quantile was renamed to `method`", + ): + actual = v.quantile(q, dim="y", interpolation=method) + + expected = v.quantile(q, dim="y", method=method) + + np.testing.assert_allclose(actual.values, expected.values) + + with warnings.catch_warnings(record=True): + with pytest.raises(TypeError, match="interpolation and method keywords"): + v.quantile(q, dim="y", interpolation=method, method=method) + @requires_dask def test_quantile_chunked_dim_error(self): v = Variable(["x", "y"], self.d).chunk({"x": 2}) From e71de637fda27f145e242144b28c2a1ab072a9ba Mon Sep 17 00:00:00 2001 From: "Alan D. Snow" Date: Tue, 8 Feb 2022 22:28:55 -0600 Subject: [PATCH 36/68] REF: Make mypy manual stage with pre-commit (#6024) * REF: Make mypy nanual stage with pre-commit * Update .pre-commit-config.yaml Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- .pre-commit-config.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 474916cf1a1..5ac1d1e3c3d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,9 +48,11 @@ repos: rev: v0.931 hooks: - id: mypy - # `properies` & `asv_bench` are copied from setup.cfg. - # `_typed_ops.py` is added since otherwise mypy will complain (but notably only in pre-commit) - exclude: "properties|asv_bench|_typed_ops.py" + # Copied from setup.cfg + exclude: "properties|asv_bench" + # This is slow and so we take it out of the fast-path; requires passing + # `--hook-stage manual` to pre-commit + stages: [manual] additional_dependencies: [ # Type stubs types-python-dateutil, From 48290fa14accd3ac87768d3f73d69493b82b0be6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 8 Feb 2022 21:07:19 -0800 Subject: [PATCH 37/68] Replace skip-duplicate logic with GH concurrency logic (#6245) --- .github/workflows/publish-test-results.yaml | 26 ++++----------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/.github/workflows/publish-test-results.yaml b/.github/workflows/publish-test-results.yaml index ea429d360c5..ba77c1fec3c 100644 --- a/.github/workflows/publish-test-results.yaml +++ b/.github/workflows/publish-test-results.yaml @@ -8,31 +8,15 @@ on: types: - completed -jobs: - - skip-duplicate-jobs: - runs-on: ubuntu-latest - if: | - github.repository == 'pydata/xarray' - && (github.event_name == 'push' || github.event_name == 'pull_request') - outputs: - should_skip: ${{ steps.skip_check.outputs.should_skip }} - steps: - - id: skip_check - uses: fkirc/skip-duplicate-actions@v3.4.1 - with: - # For workflows which are triggered concurrently with the same - # contents, attempt to execute them exactly once. - concurrent_skipping: 'same_content_newer' - paths_ignore: '["**/doc/**"]' +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +jobs: publish-test-results: name: Publish test results runs-on: ubuntu-latest - needs: skip-duplicate-jobs - if: | - needs.skip-duplicate-jobs.outputs.should_skip != 'true' - && github.event.workflow_run.conclusion != 'skipped' + if: github.event.workflow_run.conclusion != 'skipped' steps: - name: Download and extract artifacts From 39860f9bd3ed4e84a5d694adda10c82513ed519f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 9 Feb 2022 13:52:39 +0100 Subject: [PATCH 38/68] Run pyupgrade on core/weighted (#6257) * add annotations * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Illviljan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- xarray/core/weighted.py | 102 ++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index f34f993ef31..83ce36bcb35 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,4 +1,6 @@ -from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Optional, Union, cast +from __future__ import annotations + +from typing import TYPE_CHECKING, Generic, Hashable, Iterable, cast import numpy as np @@ -74,7 +76,7 @@ class Weighted(Generic[T_Xarray]): __slots__ = ("obj", "weights") - def __init__(self, obj: T_Xarray, weights: "DataArray"): + def __init__(self, obj: T_Xarray, weights: DataArray): """ Create a Weighted object @@ -118,9 +120,9 @@ def _weight_check(w): _weight_check(weights.data) self.obj: T_Xarray = obj - self.weights: "DataArray" = weights + self.weights: DataArray = weights - def _check_dim(self, dim: Optional[Union[Hashable, Iterable[Hashable]]]): + def _check_dim(self, dim: Hashable | Iterable[Hashable] | None): """raise an error if any dimension is missing""" if isinstance(dim, str) or not isinstance(dim, Iterable): @@ -135,11 +137,11 @@ def _check_dim(self, dim: Optional[Union[Hashable, Iterable[Hashable]]]): @staticmethod def _reduce( - da: "DataArray", - weights: "DataArray", - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - ) -> "DataArray": + da: DataArray, + weights: DataArray, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + ) -> DataArray: """reduce using dot; equivalent to (da * weights).sum(dim, skipna) for internal use only @@ -158,8 +160,8 @@ def _reduce( return dot(da, weights, dims=dim) def _sum_of_weights( - self, da: "DataArray", dim: Optional[Union[Hashable, Iterable[Hashable]]] = None - ) -> "DataArray": + self, da: DataArray, dim: Hashable | Iterable[Hashable] | None = None + ) -> DataArray: """Calculate the sum of weights, accounting for missing values""" # we need to mask data values that are nan; else the weights are wrong @@ -181,10 +183,10 @@ def _sum_of_weights( def _sum_of_squares( self, - da: "DataArray", - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - ) -> "DataArray": + da: DataArray, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + ) -> DataArray: """Reduce a DataArray by a weighted ``sum_of_squares`` along some dimension(s).""" demeaned = da - da.weighted(self.weights).mean(dim=dim) @@ -193,20 +195,20 @@ def _sum_of_squares( def _weighted_sum( self, - da: "DataArray", - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - ) -> "DataArray": + da: DataArray, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + ) -> DataArray: """Reduce a DataArray by a weighted ``sum`` along some dimension(s).""" return self._reduce(da, self.weights, dim=dim, skipna=skipna) def _weighted_mean( self, - da: "DataArray", - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - ) -> "DataArray": + da: DataArray, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + ) -> DataArray: """Reduce a DataArray by a weighted ``mean`` along some dimension(s).""" weighted_sum = self._weighted_sum(da, dim=dim, skipna=skipna) @@ -217,10 +219,10 @@ def _weighted_mean( def _weighted_var( self, - da: "DataArray", - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - ) -> "DataArray": + da: DataArray, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + ) -> DataArray: """Reduce a DataArray by a weighted ``var`` along some dimension(s).""" sum_of_squares = self._sum_of_squares(da, dim=dim, skipna=skipna) @@ -231,10 +233,10 @@ def _weighted_var( def _weighted_std( self, - da: "DataArray", - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - ) -> "DataArray": + da: DataArray, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + ) -> DataArray: """Reduce a DataArray by a weighted ``std`` along some dimension(s).""" return cast("DataArray", np.sqrt(self._weighted_var(da, dim, skipna))) @@ -245,8 +247,8 @@ def _implementation(self, func, dim, **kwargs): def sum_of_weights( self, - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - keep_attrs: Optional[bool] = None, + dim: Hashable | Iterable[Hashable] | None = None, + keep_attrs: bool | None = None, ) -> T_Xarray: return self._implementation( @@ -255,9 +257,9 @@ def sum_of_weights( def sum_of_squares( self, - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - keep_attrs: Optional[bool] = None, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + keep_attrs: bool | None = None, ) -> T_Xarray: return self._implementation( @@ -266,9 +268,9 @@ def sum_of_squares( def sum( self, - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - keep_attrs: Optional[bool] = None, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + keep_attrs: bool | None = None, ) -> T_Xarray: return self._implementation( @@ -277,9 +279,9 @@ def sum( def mean( self, - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - keep_attrs: Optional[bool] = None, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + keep_attrs: bool | None = None, ) -> T_Xarray: return self._implementation( @@ -288,9 +290,9 @@ def mean( def var( self, - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - keep_attrs: Optional[bool] = None, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + keep_attrs: bool | None = None, ) -> T_Xarray: return self._implementation( @@ -299,9 +301,9 @@ def var( def std( self, - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, - skipna: Optional[bool] = None, - keep_attrs: Optional[bool] = None, + dim: Hashable | Iterable[Hashable] | None = None, + skipna: bool | None = None, + keep_attrs: bool | None = None, ) -> T_Xarray: return self._implementation( @@ -317,7 +319,7 @@ def __repr__(self): class DataArrayWeighted(Weighted["DataArray"]): - def _implementation(self, func, dim, **kwargs) -> "DataArray": + def _implementation(self, func, dim, **kwargs) -> DataArray: self._check_dim(dim) @@ -327,7 +329,7 @@ def _implementation(self, func, dim, **kwargs) -> "DataArray": class DatasetWeighted(Weighted["Dataset"]): - def _implementation(self, func, dim, **kwargs) -> "Dataset": + def _implementation(self, func, dim, **kwargs) -> Dataset: self._check_dim(dim) From 44a3e3bb76fc1266cbaef6e6fa84fa0c146af846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20K=C3=B6lling?= Date: Wed, 9 Feb 2022 16:12:31 +0100 Subject: [PATCH 39/68] removed check for last dask chunk size in to_zarr (#6258) * removed check for last dask chunk size in to_zarr When storing a dask-chunked dataset to zarr, the size of the last chunk in each dimension does not matter, as this single last chunk will be written to any number of zarr chunks, but none of the zarr chunks which are being written to will be accessed by any other dask chunk. * whats_new: reference to pull request * test_chunk_encoding_with_dask: relaxes tests to allow any aligned writes These tests were overly restrictive, any aligned write to zarr should work, independent of how many zarr chunks are touched by a single dask chunk, as long as not one zarr chunks is touched by multiple dask chunks. * dask/zarr tests: less missleading variable names --- doc/whats-new.rst | 3 +++ xarray/backends/zarr.py | 17 ------------- xarray/tests/test_backends.py | 46 ++++++++++++++++++++++++++--------- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a8cd952609c..2cadf6ff478 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,9 @@ Bug fixes ~~~~~~~~~ +- Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size + can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index efb22bef1d4..b3f62bb798d 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -160,8 +160,6 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): # threads if var_chunks and enc_chunks_tuple: for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks): - if len(dchunks) == 1: - continue for dchunk in dchunks[:-1]: if dchunk % zchunk: base_error = ( @@ -175,21 +173,6 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): + " Consider either rechunking using `chunk()`, deleting " "or modifying `encoding['chunks']`, or specify `safe_chunks=False`." ) - if dchunks[-1] > zchunk: - base_error = ( - "Final chunk of Zarr array must be the same size or " - "smaller than the first. " - f"Specified Zarr chunk encoding['chunks']={enc_chunks_tuple}, " - f"for variable named {name!r} " - f"but {dchunks} in the variable's Dask chunks {var_chunks} are " - "incompatible with this encoding. " - ) - if safe_chunks: - raise NotImplementedError( - base_error - + " Consider either rechunking using `chunk()`, deleting " - "or modifying `encoding['chunks']`, or specify `safe_chunks=False`." - ) return enc_chunks_tuple raise AssertionError("We should never get here. Function logic must be wrong.") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 23c48369989..321759f3ef6 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1895,20 +1895,24 @@ def test_chunk_encoding_with_dask(self): # don't actually check equality because the data could be corrupted pass - badenc.var1.encoding["chunks"] = (2,) - with pytest.raises(NotImplementedError, match=r"Specified Zarr chunk encoding"): - with self.roundtrip(badenc) as actual: - pass + # if dask chunks (4) are an integer multiple of zarr chunks (2) it should not fail... + goodenc = ds.chunk({"x": 4}) + goodenc.var1.encoding["chunks"] = (2,) + with self.roundtrip(goodenc) as actual: + pass - badenc = badenc.chunk({"x": (3, 3, 6)}) - badenc.var1.encoding["chunks"] = (3,) - with pytest.raises( - NotImplementedError, match=r"incompatible with this encoding" - ): - with self.roundtrip(badenc) as actual: - pass + # if initial dask chunks are aligned, size of last dask chunk doesn't matter + goodenc = ds.chunk({"x": (3, 3, 6)}) + goodenc.var1.encoding["chunks"] = (3,) + with self.roundtrip(goodenc) as actual: + pass - # ... except if the last chunk is smaller than the first + goodenc = ds.chunk({"x": (3, 6, 3)}) + goodenc.var1.encoding["chunks"] = (3,) + with self.roundtrip(goodenc) as actual: + pass + + # ... also if the last chunk is irregular ds_chunk_irreg = ds.chunk({"x": (5, 5, 2)}) with self.roundtrip(ds_chunk_irreg) as actual: assert (5,) == actual["var1"].encoding["chunks"] @@ -1917,6 +1921,15 @@ def test_chunk_encoding_with_dask(self): with self.roundtrip(original) as actual: assert_identical(original, actual) + # but itermediate unaligned chunks are bad + badenc = ds.chunk({"x": (3, 5, 3, 1)}) + badenc.var1.encoding["chunks"] = (3,) + with pytest.raises( + NotImplementedError, match=r"would overlap multiple dask chunks" + ): + with self.roundtrip(badenc) as actual: + pass + # - encoding specified - # specify compatible encodings for chunk_enc in 4, (4,): @@ -2374,6 +2387,15 @@ def test_chunk_encoding_with_partial_dask_chunks(self): ) as ds1: assert_equal(ds1, original) + @requires_dask + def test_chunk_encoding_with_larger_dask_chunks(self): + original = xr.Dataset({"a": ("x", [1, 2, 3, 4])}).chunk({"x": 2}) + + with self.roundtrip( + original, save_kwargs={"encoding": {"a": {"chunks": [1]}}} + ) as ds1: + assert_equal(ds1, original) + @requires_cftime def test_open_zarr_use_cftime(self): ds = create_test_data() From d479009d79374dc4a56c9f4346b1af38f5ac182c Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 10 Feb 2022 11:44:51 -0800 Subject: [PATCH 40/68] [docs] update urls throughout documentation (#6262) * update urls throughout documentation * more url updates * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update doc/internals/how-to-add-new-backend.rst * Apply suggestions from code review Co-authored-by: Anderson Banihirwe Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Anderson Banihirwe --- CONTRIBUTING.md | 2 +- README.rst | 6 ++-- asv_bench/asv.conf.json | 2 +- ci/install-upstream-wheels.sh | 2 +- design_notes/flexible_indexes_notes.md | 2 +- doc/README.rst | 2 +- doc/conf.py | 6 ++-- doc/contributing.rst | 18 +++++------ doc/ecosystem.rst | 15 ++++----- doc/gallery.rst | 2 +- doc/gallery/plot_rasterio.py | 2 +- doc/gallery/plot_rasterio_rgb.py | 2 +- doc/getting-started-guide/faq.rst | 14 ++++----- doc/getting-started-guide/installing.rst | 28 ++++++++--------- doc/getting-started-guide/quick-overview.rst | 2 +- doc/index.rst | 12 ++++---- doc/internals/how-to-add-new-backend.rst | 8 ++--- doc/internals/zarr-encoding-spec.rst | 2 +- doc/roadmap.rst | 2 +- doc/tutorials-and-videos.rst | 4 +-- doc/user-guide/computation.rst | 10 +++--- doc/user-guide/dask.rst | 8 ++--- doc/user-guide/data-structures.rst | 8 ++--- doc/user-guide/groupby.rst | 6 ++-- doc/user-guide/indexing.rst | 10 +++--- doc/user-guide/io.rst | 32 ++++++++++---------- doc/user-guide/pandas.rst | 8 ++--- doc/user-guide/plotting.rst | 18 +++++------ doc/user-guide/reshaping.rst | 2 +- doc/user-guide/time-series.rst | 10 +++--- doc/user-guide/weather-climate.rst | 2 +- doc/whats-new.rst | 20 ++++++------ setup.cfg | 6 ++-- xarray/backends/api.py | 4 +-- xarray/backends/h5netcdf_.py | 2 +- xarray/backends/plugins.py | 12 ++++---- xarray/backends/rasterio_.py | 2 +- xarray/core/computation.py | 4 +-- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 4 +-- xarray/core/dtypes.py | 2 +- xarray/core/indexing.py | 4 +-- xarray/core/nputils.py | 2 +- xarray/tests/test_backends.py | 6 ++-- xarray/tests/test_cupy.py | 2 +- xarray/tutorial.py | 6 ++-- 46 files changed, 162 insertions(+), 163 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7a909aefd08..dd9931f907b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1 +1 @@ -Xarray's contributor guidelines [can be found in our online documentation](http://xarray.pydata.org/en/stable/contributing.html) +Xarray's contributor guidelines [can be found in our online documentation](http://docs.xarray.dev/en/stable/contributing.html) diff --git a/README.rst b/README.rst index f58b0002b62..7a4ad4e1f9f 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ xarray: N-D labeled arrays and datasets .. image:: https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg :target: https://codecov.io/gh/pydata/xarray .. image:: https://readthedocs.org/projects/xray/badge/?version=latest - :target: https://xarray.pydata.org/ + :target: https://docs.xarray.dev/ .. image:: https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat :target: https://pandas.pydata.org/speed/xarray/ .. image:: https://img.shields.io/pypi/v/xarray.svg @@ -69,12 +69,12 @@ powerful and concise interface. For example: Documentation ------------- -Learn more about xarray in its official documentation at https://xarray.pydata.org/ +Learn more about xarray in its official documentation at https://docs.xarray.dev/ Contributing ------------ -You can find information about contributing to xarray at our `Contributing page `_. +You can find information about contributing to xarray at our `Contributing page `_. Get in touch ------------ diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 7e0b11b815a..3e4137cf807 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -7,7 +7,7 @@ "project": "xarray", // The project's homepage - "project_url": "http://xarray.pydata.org/", + "project_url": "http://docs.xarray.dev/", // The URL or local path of the source code repository for the // project being benchmarked diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index 5fde7045b7d..96a39ccd20b 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -40,7 +40,7 @@ python -m pip install \ git+https://github.com/dask/distributed \ git+https://github.com/zarr-developers/zarr \ git+https://github.com/Unidata/cftime \ - git+https://github.com/mapbox/rasterio \ + git+https://github.com/rasterio/rasterio \ git+https://github.com/pypa/packaging \ git+https://github.com/hgrecco/pint \ git+https://github.com/pydata/bottleneck \ diff --git a/design_notes/flexible_indexes_notes.md b/design_notes/flexible_indexes_notes.md index c7eb718720c..b36ce3e46ed 100644 --- a/design_notes/flexible_indexes_notes.md +++ b/design_notes/flexible_indexes_notes.md @@ -133,7 +133,7 @@ A possible, more explicit solution to reuse a `pandas.MultiIndex` in a DataArray New indexes may also be built from existing sets of coordinates or variables in a Dataset/DataArray using the `.set_index()` method. -The [current signature](http://xarray.pydata.org/en/stable/generated/xarray.DataArray.set_index.html#xarray.DataArray.set_index) of `.set_index()` is tailored to `pandas.MultiIndex` and tied to the concept of a dimension-index. It is therefore hardly reusable as-is in the context of flexible indexes proposed here. +The [current signature](http://docs.xarray.dev/en/stable/generated/xarray.DataArray.set_index.html#xarray.DataArray.set_index) of `.set_index()` is tailored to `pandas.MultiIndex` and tied to the concept of a dimension-index. It is therefore hardly reusable as-is in the context of flexible indexes proposed here. The new signature may look like one of these: diff --git a/doc/README.rst b/doc/README.rst index 0579f85d85f..c1b6c63a4c0 100644 --- a/doc/README.rst +++ b/doc/README.rst @@ -3,4 +3,4 @@ xarray ------ -You can find information about building the docs at our `Contributing page `_. +You can find information about building the docs at our `Contributing page `_. diff --git a/doc/conf.py b/doc/conf.py index 93174c6aaec..5c4c0a52d43 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -260,12 +260,12 @@ # configuration for sphinxext.opengraph -ogp_site_url = "https://xarray.pydata.org/en/latest/" -ogp_image = "https://xarray.pydata.org/en/stable/_static/dataset-diagram-logo.png" +ogp_site_url = "https://docs.xarray.dev/en/latest/" +ogp_image = "https://docs.xarray.dev/en/stable/_static/dataset-diagram-logo.png" ogp_custom_meta_tags = [ '', '', - '', + '', ] # Redirects for pages that were moved to new locations diff --git a/doc/contributing.rst b/doc/contributing.rst index f5653fcc65e..df279caa54f 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -95,14 +95,14 @@ version control to allow many people to work together on the project. Some great resources for learning Git: -* the `GitHub help pages `_. -* the `NumPy's documentation `_. -* Matthew Brett's `Pydagogue `_. +* the `GitHub help pages `_. +* the `NumPy's documentation `_. +* Matthew Brett's `Pydagogue `_. Getting started with Git ------------------------ -`GitHub has instructions `__ for installing git, +`GitHub has instructions `__ for installing git, setting up your SSH key, and configuring git. All these steps need to be completed before you can work seamlessly between your local repository and GitHub. @@ -455,7 +455,7 @@ it is worth getting in the habit of writing tests ahead of time so that this is Like many packages, *xarray* uses `pytest `_ and the convenient extensions in `numpy.testing -`_. +`_. Writing tests ~~~~~~~~~~~~~ @@ -855,15 +855,15 @@ GitHub. To delete it there do:: PR checklist ------------ -- **Properly comment and document your code.** See `"Documenting your code" `_. -- **Test that the documentation builds correctly** by typing ``make html`` in the ``doc`` directory. This is not strictly necessary, but this may be easier than waiting for CI to catch a mistake. See `"Contributing to the documentation" `_. +- **Properly comment and document your code.** See `"Documenting your code" `_. +- **Test that the documentation builds correctly** by typing ``make html`` in the ``doc`` directory. This is not strictly necessary, but this may be easier than waiting for CI to catch a mistake. See `"Contributing to the documentation" `_. - **Test your code**. - - Write new tests if needed. See `"Test-driven development/code writing" `_. + - Write new tests if needed. See `"Test-driven development/code writing" `_. - Test the code using `Pytest `_. Running all tests (type ``pytest`` in the root directory) takes a while, so feel free to only run the tests you think are needed based on your PR (example: ``pytest xarray/tests/test_dataarray.py``). CI will catch any failing tests. - By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a [test-upstream] tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a "[skip-ci]" tag to the first line of the commit message. -- **Properly format your code** and verify that it passes the formatting guidelines set by `Black `_ and `Flake8 `_. See `"Code formatting" `_. You can use `pre-commit `_ to run these automatically on each commit. +- **Properly format your code** and verify that it passes the formatting guidelines set by `Black `_ and `Flake8 `_. See `"Code formatting" `_. You can use `pre-commit `_ to run these automatically on each commit. - Run ``pre-commit run --all-files`` in the root directory. This may modify some files. Confirm and commit any formatting changes. diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst index a9cbf39b644..469f83d37c1 100644 --- a/doc/ecosystem.rst +++ b/doc/ecosystem.rst @@ -20,12 +20,12 @@ Geosciences - `infinite-diff `_: xarray-based finite-differencing, focused on gridded climate/meteorology data - `marc_analysis `_: Analysis package for CESM/MARC experiments and output. - `MetPy `_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data. -- `MPAS-Analysis `_: Analysis for simulations produced with Model for Prediction Across Scales (MPAS) components and the Accelerated Climate Model for Energy (ACME). -- `OGGM `_: Open Global Glacier Model +- `MPAS-Analysis `_: Analysis for simulations produced with Model for Prediction Across Scales (MPAS) components and the Accelerated Climate Model for Energy (ACME). +- `OGGM `_: Open Global Glacier Model - `Oocgcm `_: Analysis of large gridded geophysical datasets - `Open Data Cube `_: Analysis toolkit of continental scale Earth Observation data from satellites. - `Pangaea: `_: xarray extension for gridded land surface & weather model output). -- `Pangeo `_: A community effort for big data geoscience in the cloud. +- `Pangeo `_: A community effort for big data geoscience in the cloud. - `PyGDX `_: Python 3 package for accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom subclass. @@ -41,13 +41,13 @@ Geosciences - `wradlib `_: An Open Source Library for Weather Radar Data Processing. - `wrf-python `_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model. - `xarray-simlab `_: xarray extension for computer model simulations. -- `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.) -- `xarray-topo `_: xarray extension for topographic analysis and modelling. +- `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.) +- `xarray-topo `_: xarray extension for topographic analysis and modelling. - `xbpch `_: xarray interface for bpch files. - `xclim `_: A library for calculating climate science indices with unit handling built from xarray and dask. - `xESMF `_: Universal regridder for geospatial data. - `xgcm `_: Extends the xarray data model to understand finite volume grid cells (common in General Circulation Models) and provides interpolation and difference operations for such grids. -- `xmitgcm `_: a python package for reading `MITgcm `_ binary MDS files into xarray data structures. +- `xmitgcm `_: a python package for reading `MITgcm `_ binary MDS files into xarray data structures. - `xnemogcm `_: a package to read `NEMO `_ output files and add attributes to interface with xgcm. Machine Learning @@ -57,6 +57,7 @@ Machine Learning - `Elm `_: Parallel machine learning on xarray data structures - `sklearn-xarray (1) `_: Combines scikit-learn and xarray (1). - `sklearn-xarray (2) `_: Combines scikit-learn and xarray (2). +- `xbatcher `_: Batch Generation from Xarray Datasets. Other domains ~~~~~~~~~~~~~ @@ -90,7 +91,7 @@ Visualization Non-Python projects ~~~~~~~~~~~~~~~~~~~ -- `xframe `_: C++ data structures inspired by xarray. +- `xframe `_: C++ data structures inspired by xarray. - `AxisArrays `_ and `NamedArrays `_: similar data structures for Julia. diff --git a/doc/gallery.rst b/doc/gallery.rst index 9e5284cc2ee..36eb39d1a53 100644 --- a/doc/gallery.rst +++ b/doc/gallery.rst @@ -116,7 +116,7 @@ External Examples --- :img-top: https://github.com/avatars/u/60833341?s=200&v=4 ++++ - .. link-button:: http://gallery.pangeo.io/ + .. link-button:: https://gallery.pangeo.io/ :type: url :text: Xarray and dask on the cloud with Pangeo :classes: btn-outline-dark btn-block stretched-link diff --git a/doc/gallery/plot_rasterio.py b/doc/gallery/plot_rasterio.py index 8294e01975f..853923a38bd 100644 --- a/doc/gallery/plot_rasterio.py +++ b/doc/gallery/plot_rasterio.py @@ -23,7 +23,7 @@ import xarray as xr # Read the data -url = "https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif" +url = "https://github.com/rasterio/rasterio/raw/master/tests/data/RGB.byte.tif" da = xr.open_rasterio(url) # Compute the lon/lat coordinates with pyproj diff --git a/doc/gallery/plot_rasterio_rgb.py b/doc/gallery/plot_rasterio_rgb.py index 758d4cd3c37..912224ac132 100644 --- a/doc/gallery/plot_rasterio_rgb.py +++ b/doc/gallery/plot_rasterio_rgb.py @@ -18,7 +18,7 @@ import xarray as xr # Read the data -url = "https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif" +url = "https://github.com/rasterio/rasterio/raw/master/tests/data/RGB.byte.tif" da = xr.open_rasterio(url) # The data is in UTM projection. We have to set it manually until diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst index d6e1c812fb2..0eeb09c432c 100644 --- a/doc/getting-started-guide/faq.rst +++ b/doc/getting-started-guide/faq.rst @@ -136,7 +136,7 @@ With xarray, we draw a firm line between labels that the library understands example, we do not automatically interpret and enforce units or `CF conventions`_. (An exception is serialization to and from netCDF files.) -.. _CF conventions: http://cfconventions.org/latest.html +.. _CF conventions: https://cfconventions.org/latest.html An implication of this choice is that we do not propagate ``attrs`` through most operations unless explicitly flagged (some methods have a ``keep_attrs`` @@ -155,7 +155,7 @@ xarray, and have contributed a number of improvements and fixes upstream. Xarray does not yet support all of netCDF4-python's features, such as modifying files on-disk. -__ https://github.com/Unidata/netcdf4-python +__ https://unidata.github.io/netcdf4-python/ Iris_ (supported by the UK Met office) provides similar tools for in- memory manipulation of labeled arrays, aimed specifically at weather and @@ -166,13 +166,13 @@ different approaches to handling metadata: Iris strictly interprets integration with Cartopy_. .. _Iris: https://scitools-iris.readthedocs.io/en/stable/ -.. _Cartopy: http://scitools.org.uk/cartopy/docs/latest/ +.. _Cartopy: https://scitools.org.uk/cartopy/docs/latest/ `UV-CDAT`__ is another Python library that implements in-memory netCDF-like variables and `tools for working with climate data`__. -__ http://uvcdat.llnl.gov/ -__ http://drclimate.wordpress.com/2014/01/02/a-beginners-guide-to-scripting-with-uv-cdat/ +__ https://uvcdat.llnl.gov/ +__ https://drclimate.wordpress.com/2014/01/02/a-beginners-guide-to-scripting-with-uv-cdat/ We think the design decisions we have made for xarray (namely, basing it on pandas) make it a faster and more flexible data analysis tool. That said, Iris @@ -197,7 +197,7 @@ would certainly appreciate it. We recommend two citations. - Hoyer, S. & Hamman, J., (2017). xarray: N-D labeled Arrays and Datasets in Python. Journal of Open Research Software. 5(1), p.10. - DOI: http://doi.org/10.5334/jors.148 + DOI: https://doi.org/10.5334/jors.148 Here’s an example of a BibTeX entry:: @@ -210,7 +210,7 @@ would certainly appreciate it. We recommend two citations. year = {2017}, publisher = {Ubiquity Press}, doi = {10.5334/jors.148}, - url = {http://doi.org/10.5334/jors.148} + url = {https://doi.org/10.5334/jors.148} } 2. You may also want to cite a specific version of the xarray package. We diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index c14e7d36579..6177ba0aaac 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -27,21 +27,21 @@ For netCDF and IO - `netCDF4 `__: recommended if you want to use xarray for reading or writing netCDF files -- `scipy `__: used as a fallback for reading/writing netCDF3 -- `pydap `__: used as a fallback for accessing OPeNDAP -- `h5netcdf `__: an alternative library for +- `scipy `__: used as a fallback for reading/writing netCDF3 +- `pydap `__: used as a fallback for accessing OPeNDAP +- `h5netcdf `__: an alternative library for reading and writing netCDF4 files that does not use the netCDF-C libraries - `PyNIO `__: for reading GRIB and other geoscience specific file formats. Note that PyNIO is not available for Windows and that the PyNIO backend may be moved outside of xarray in the future. -- `zarr `__: for chunked, compressed, N-dimensional arrays. +- `zarr `__: for chunked, compressed, N-dimensional arrays. - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before year 1678 or after year 2262. - `PseudoNetCDF `__: recommended for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files (ffi1001) and many other. -- `rasterio `__: for reading GeoTiffs and +- `rasterio `__: for reading GeoTiffs and other gridded raster datasets. - `iris `__: for conversion to and from iris' Cube objects @@ -51,26 +51,26 @@ For netCDF and IO For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ -- `scipy `__: necessary to enable the interpolation features for +- `scipy `__: necessary to enable the interpolation features for xarray objects - `bottleneck `__: speeds up NaN-skipping and rolling window aggregations by a large factor -- `numbagg `_: for exponential rolling +- `numbagg `_: for exponential rolling window operations For parallel computing ~~~~~~~~~~~~~~~~~~~~~~ -- `dask.array `__: required for :ref:`dask`. +- `dask.array `__: required for :ref:`dask`. For plotting ~~~~~~~~~~~~ -- `matplotlib `__: required for :ref:`plotting` -- `cartopy `__: recommended for :ref:`plot-maps` -- `seaborn `__: for better +- `matplotlib `__: required for :ref:`plotting` +- `cartopy `__: recommended for :ref:`plot-maps` +- `seaborn `__: for better color palettes -- `nc-time-axis `__: for plotting +- `nc-time-axis `__: for plotting cftime.datetime objects Alternative data containers @@ -115,11 +115,11 @@ with its recommended dependencies using the conda command line tool:: $ conda install -c conda-forge xarray dask netCDF4 bottleneck -.. _conda: http://conda.io/ +.. _conda: https://docs.conda.io If you require other :ref:`optional-dependencies` add them to the line above. -We recommend using the community maintained `conda-forge `__ channel, +We recommend using the community maintained `conda-forge `__ channel, as some of the dependencies are difficult to build. New releases may also appear in conda-forge before being updated in the default channel. diff --git a/doc/getting-started-guide/quick-overview.rst b/doc/getting-started-guide/quick-overview.rst index 5bb5bb88ad3..cd4b66d2f6f 100644 --- a/doc/getting-started-guide/quick-overview.rst +++ b/doc/getting-started-guide/quick-overview.rst @@ -69,7 +69,7 @@ Unlike positional indexing, label-based indexing frees us from having to know ho Attributes ---------- -While you're setting up your DataArray, it's often a good idea to set metadata attributes. A useful choice is to set ``data.attrs['long_name']`` and ``data.attrs['units']`` since xarray will use these, if present, to automatically label your plots. These special names were chosen following the `NetCDF Climate and Forecast (CF) Metadata Conventions `_. ``attrs`` is just a Python dictionary, so you can assign anything you wish. +While you're setting up your DataArray, it's often a good idea to set metadata attributes. A useful choice is to set ``data.attrs['long_name']`` and ``data.attrs['units']`` since xarray will use these, if present, to automatically label your plots. These special names were chosen following the `NetCDF Climate and Forecast (CF) Metadata Conventions `_. ``attrs`` is just a Python dictionary, so you can assign anything you wish. .. ipython:: python diff --git a/doc/index.rst b/doc/index.rst index cffa450b6e8..c549c33aa62 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -17,10 +17,10 @@ It is particularly tailored to working with netCDF_ files, which were the source of xarray's data model, and integrates tightly with dask_ for parallel computing. -.. _NumPy: http://www.numpy.org -.. _pandas: http://pandas.pydata.org -.. _dask: http://dask.org -.. _netCDF: http://www.unidata.ucar.edu/software/netcdf +.. _NumPy: https://www.numpy.org +.. _pandas: https://pandas.pydata.org +.. _dask: https://dask.org +.. _netCDF: https://www.unidata.ucar.edu/software/netcdf .. toctree:: @@ -98,7 +98,7 @@ Hoyer, Alex Kleeman and Eugene Brevdo and was released as open source in May 2014. The project was renamed from "xray" in January 2016. Xarray became a fiscally sponsored project of NumFOCUS_ in August 2018. -__ http://climate.com/ +__ https://climate.com/ .. _NumFOCUS: https://numfocus.org License @@ -106,4 +106,4 @@ License Xarray is available under the open source `Apache License`__. -__ http://www.apache.org/licenses/LICENSE-2.0.html +__ https://www.apache.org/licenses/LICENSE-2.0.html diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index 22216997273..ceb59c8a3bd 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -311,9 +311,7 @@ The BackendArray subclass shall implement the following method and attributes: - the ``shape`` attribute - the ``dtype`` attribute. - -Xarray supports different type of -`indexing `__, that can be +Xarray supports different type of :doc:`/user-guide/indexing`, that can be grouped in three types of indexes :py:class:`~xarray.core.indexing.BasicIndexer`, :py:class:`~xarray.core.indexing.OuterIndexer` and @@ -372,7 +370,7 @@ input the ``key``, the array ``shape`` and the following parameters: For more details see :py:class:`~xarray.core.indexing.IndexingSupport` and :ref:`RST indexing`. -In order to support `Dask `__ distributed and +In order to support `Dask Distributed `__ and :py:mod:`multiprocessing`, ``BackendArray`` subclass should be serializable either with :ref:`io.pickle` or `cloudpickle `__. @@ -436,7 +434,7 @@ currently available in :py:mod:`~xarray.backends` module. Backend preferred chunks ^^^^^^^^^^^^^^^^^^^^^^^^ -The backend is not directly involved in `Dask `__ +The backend is not directly involved in `Dask `__ chunking, since it is internally managed by Xarray. However, the backend can define the preferred chunk size inside the variable’s encoding ``var.encoding["preferred_chunks"]``. The ``preferred_chunks`` may be useful diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 082d7984f59..f809ea337d5 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -5,7 +5,7 @@ Zarr Encoding Specification ============================ -In implementing support for the `Zarr `_ storage +In implementing support for the `Zarr `_ storage format, Xarray developers made some *ad hoc* choices about how to store NetCDF data in Zarr. Future versions of the Zarr spec will likely include a more formal convention diff --git a/doc/roadmap.rst b/doc/roadmap.rst index b6ccb8d73db..c59d56fdd6d 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -20,7 +20,7 @@ Why has xarray been successful? In our opinion: - The dominant use-case for xarray is for analysis of gridded dataset in the geosciences, e.g., as part of the - `Pangeo `__ project. + `Pangeo `__ project. - Xarray is also used more broadly in the physical sciences, where we've found the needs for analyzing multidimensional datasets are remarkably consistent (e.g., see diff --git a/doc/tutorials-and-videos.rst b/doc/tutorials-and-videos.rst index 0a266c4f4a7..6a9602bcfa6 100644 --- a/doc/tutorials-and-videos.rst +++ b/doc/tutorials-and-videos.rst @@ -62,8 +62,8 @@ Books, Chapters and Articles .. _Xarray's Tutorials: https://xarray-contrib.github.io/xarray-tutorial/ -.. _Journal of Open Research Software paper: http://doi.org/10.5334/jors.148 +.. _Journal of Open Research Software paper: https://doi.org/10.5334/jors.148 .. _UW eScience Institute's Geohackweek : https://geohackweek.github.io/nDarrays/ .. _tutorial: https://github.com/Unidata/unidata-users-workshop/blob/master/notebooks/xray-tutorial.ipynb .. _with answers: https://github.com/Unidata/unidata-users-workshop/blob/master/notebooks/xray-tutorial-with-answers.ipynb -.. _Nicolas Fauchereau's 2015 tutorial: http://nbviewer.iPython.org/github/nicolasfauchereau/metocean/blob/master/notebooks/xray.ipynb +.. _Nicolas Fauchereau's 2015 tutorial: https://nbviewer.iPython.org/github/nicolasfauchereau/metocean/blob/master/notebooks/xray.ipynb diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index cb6eadc8e63..d830076e37b 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -38,7 +38,7 @@ numpy) over all array values: You can also use any of numpy's or scipy's many `ufunc`__ functions directly on a DataArray: -__ http://docs.scipy.org/doc/numpy/reference/ufuncs.html +__ https://numpy.org/doc/stable/reference/ufuncs.html .. ipython:: python @@ -200,7 +200,7 @@ From version 0.17, xarray supports multidimensional rolling, Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects with 1d-rolling. -.. _bottleneck: https://github.com/pydata/bottleneck/ +.. _bottleneck: https://github.com/pydata/bottleneck We can also manually iterate through ``Rolling`` objects: @@ -216,7 +216,7 @@ While ``rolling`` provides a simple moving average, ``DataArray`` also supports an exponential moving average with :py:meth:`~xarray.DataArray.rolling_exp`. This is similar to pandas' ``ewm`` method. numbagg_ is required. -.. _numbagg: https://github.com/shoyer/numbagg +.. _numbagg: https://github.com/numbagg/numbagg .. code:: python @@ -744,7 +744,7 @@ However, adding support for labels on both :py:class:`~xarray.Dataset` and To make this easier, xarray supplies the :py:func:`~xarray.apply_ufunc` helper function, designed for wrapping functions that support broadcasting and vectorization on unlabeled arrays in the style of a NumPy -`universal function `_ ("ufunc" for short). +`universal function `_ ("ufunc" for short). ``apply_ufunc`` takes care of everything needed for an idiomatic xarray wrapper, including alignment, broadcasting, looping over ``Dataset`` variables (if needed), and merging of coordinates. In fact, many internal xarray @@ -761,7 +761,7 @@ any additional arguments: For using more complex operations that consider some array values collectively, it's important to understand the idea of "core dimensions" from NumPy's -`generalized ufuncs `_. Core dimensions are defined as dimensions +`generalized ufuncs `_. Core dimensions are defined as dimensions that should *not* be broadcast over. Usually, they correspond to the fundamental dimensions over which an operation is defined, e.g., the summed axis in ``np.sum``. A good clue that core dimensions are needed is the presence of an diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index 4998cc68828..4d8715d9c51 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -5,7 +5,7 @@ Parallel computing with Dask ============================ -Xarray integrates with `Dask `__ to support parallel +Xarray integrates with `Dask `__ to support parallel computations and streaming computation on datasets that don't fit into memory. Currently, Dask is an entirely optional feature for xarray. However, the benefits of using Dask are sufficiently strong that Dask may become a required @@ -16,7 +16,7 @@ For a full example of how to use xarray's Dask integration, read the may be found at the `Pangeo project's gallery `_ and at the `Dask examples website `_. -.. _blog post introducing xarray and Dask: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ +.. _blog post introducing xarray and Dask: https://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ What is a Dask array? --------------------- @@ -39,7 +39,7 @@ The actual computation is controlled by a multi-processing or thread pool, which allows Dask to take full advantage of multiple processors available on most modern computers. -For more details on Dask, read `its documentation `__. +For more details on Dask, read `its documentation `__. Note that xarray only makes use of ``dask.array`` and ``dask.delayed``. .. _dask.io: @@ -225,7 +225,7 @@ disk. .. note:: For more on the differences between :py:meth:`~xarray.Dataset.persist` and - :py:meth:`~xarray.Dataset.compute` see this `Stack Overflow answer `_ and the `Dask documentation `_. + :py:meth:`~xarray.Dataset.compute` see this `Stack Overflow answer `_ and the `Dask documentation `_. For performance you may wish to consider chunk sizes. The correct choice of chunk size depends both on your data and on the operations you want to perform. diff --git a/doc/user-guide/data-structures.rst b/doc/user-guide/data-structures.rst index 1322c51248d..e0fd4bd0d25 100644 --- a/doc/user-guide/data-structures.rst +++ b/doc/user-guide/data-structures.rst @@ -227,7 +227,7 @@ container of labeled arrays (:py:class:`~xarray.DataArray` objects) with aligned dimensions. It is designed as an in-memory representation of the data model from the `netCDF`__ file format. -__ http://www.unidata.ucar.edu/software/netcdf/ +__ https://www.unidata.ucar.edu/software/netcdf/ In addition to the dict-like interface of the dataset itself, which can be used to access any variable in a dataset, datasets have four key properties: @@ -247,7 +247,7 @@ distinction for indexing and computations. Coordinates indicate constant/fixed/independent quantities, unlike the varying/measured/dependent quantities that belong in data. -.. _CF conventions: http://cfconventions.org/ +.. _CF conventions: https://cfconventions.org/ Here is an example of how we might structure a dataset for a weather forecast: @@ -520,7 +520,7 @@ in xarray: "non-dimension coordinates" are called "auxiliary coordinate variables" (see :issue:`1295` for more details). -.. _CF terminology: http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#terminology +.. _CF terminology: https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#terminology Modifying coordinates @@ -628,4 +628,4 @@ it is recommended that you explicitly set the names of the levels. at which the forecast was made, rather than ``time`` which is the valid time for which the forecast applies. -__ http://en.wikipedia.org/wiki/Map_projection +__ https://en.wikipedia.org/wiki/Map_projection diff --git a/doc/user-guide/groupby.rst b/doc/user-guide/groupby.rst index 4c4f8d473ce..98f88a3d4ec 100644 --- a/doc/user-guide/groupby.rst +++ b/doc/user-guide/groupby.rst @@ -6,8 +6,8 @@ GroupBy: split-apply-combine Xarray supports `"group by"`__ operations with the same API as pandas to implement the `split-apply-combine`__ strategy: -__ http://pandas.pydata.org/pandas-docs/stable/groupby.html -__ http://www.jstatsoft.org/v40/i01/paper +__ https://pandas.pydata.org/pandas-docs/stable/groupby.html +__ https://www.jstatsoft.org/v40/i01/paper - Split your data into multiple independent groups. - Apply some function to each group. @@ -201,7 +201,7 @@ which is different from the logical grid dimensions (e.g. nx, ny). Such variables are valid under the `CF conventions`__. Xarray supports groupby operations over multidimensional coordinate variables: -__ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimensional_latitude_longitude_coordinate_variables +__ https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimensional_latitude_longitude_coordinate_variables .. ipython:: python diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst index 89f00466fa4..29b48bf7c47 100644 --- a/doc/user-guide/indexing.rst +++ b/doc/user-guide/indexing.rst @@ -97,7 +97,7 @@ including indexing with individual, slices and arrays of labels, as well as indexing with boolean arrays. Like pandas, label based indexing in xarray is *inclusive* of both the start and stop bounds. -__ http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-label +__ https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-label Setting values with label based indexing is also supported: @@ -145,7 +145,7 @@ Python :py:class:`slice` objects or 1-dimensional arrays. brackets, but unfortunately, Python `does yet not support`__ indexing with keyword arguments like ``da[space=0]`` -__ http://legacy.python.org/dev/peps/pep-0472/ +__ https://legacy.python.org/dev/peps/pep-0472/ .. _nearest neighbor lookups: @@ -373,7 +373,7 @@ indexing for xarray is based on our :ref:`broadcasting rules `. See :ref:`indexing.rules` for the complete specification. -.. _advanced indexing: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.indexing.html +.. _advanced indexing: https://numpy.org/doc/stable/reference/arrays.indexing.html Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: @@ -503,7 +503,7 @@ This is because ``v[0] = v[0] - 1`` is called three times, rather than ``v[0] = v[0] - 1 - 1 - 1``. See `Assigning values to indexed arrays`__ for the details. -__ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-to-indexed-arrays +__ https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-indexed-arrays .. note:: @@ -751,7 +751,7 @@ Whether data is a copy or a view is more predictable in xarray than in pandas, s unlike pandas, xarray does not produce `SettingWithCopy warnings`_. However, you should still avoid assignment with chained indexing. -.. _SettingWithCopy warnings: http://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy +.. _SettingWithCopy warnings: https://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy .. _multi-level indexing: diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 16b8708231e..28eeeeda99b 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -33,14 +33,14 @@ NetCDF is supported on almost all platforms, and parsers exist for the vast majority of scientific programming languages. Recent versions of netCDF are based on the even more widely used HDF5 file-format. -__ http://www.unidata.ucar.edu/software/netcdf/ +__ https://www.unidata.ucar.edu/software/netcdf/ .. tip:: If you aren't familiar with this data format, the `netCDF FAQ`_ is a good place to start. -.. _netCDF FAQ: http://www.unidata.ucar.edu/software/netcdf/docs/faq.html#What-Is-netCDF +.. _netCDF FAQ: https://www.unidata.ucar.edu/software/netcdf/docs/faq.html#What-Is-netCDF Reading and writing netCDF files with xarray requires scipy or the `netCDF4-Python`__ library to be installed (the latter is required to @@ -70,7 +70,7 @@ the ``format`` and ``engine`` arguments. .. tip:: - Using the `h5netcdf `_ package + Using the `h5netcdf `_ package by passing ``engine='h5netcdf'`` to :py:meth:`open_dataset` can sometimes be quicker than the default ``engine='netcdf4'`` that uses the `netCDF4 `_ package. @@ -255,7 +255,7 @@ See its docstring for more details. (``compat='override'``). -.. _dask: http://dask.pydata.org +.. _dask: http://dask.org .. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`open_mfdataset`. @@ -430,7 +430,7 @@ in the `documentation for createVariable`_ for netCDF4-Python. This only works for netCDF4 files and thus requires using ``format='netCDF4'`` and either ``engine='netcdf4'`` or ``engine='h5netcdf'``. -.. _documentation for createVariable: http://unidata.github.io/netcdf4-python/#netCDF4.Dataset.createVariable +.. _documentation for createVariable: https://unidata.github.io/netcdf4-python/#netCDF4.Dataset.createVariable Chunk based gzip compression can yield impressive space savings, especially for sparse data, but it comes with significant performance overhead. HDF5 @@ -529,7 +529,7 @@ Conversely, we can create a new ``DataArray`` object from a ``Cube`` using da_cube -.. _Iris: http://scitools.org.uk/iris +.. _Iris: https://scitools.org.uk/iris OPeNDAP @@ -538,13 +538,13 @@ OPeNDAP Xarray includes support for `OPeNDAP`__ (via the netCDF4 library or Pydap), which lets us access large datasets over HTTP. -__ http://www.opendap.org/ +__ https://www.opendap.org/ For example, we can open a connection to GBs of weather data produced by the `PRISM`__ project, and hosted by `IRI`__ at Columbia: -__ http://www.prism.oregonstate.edu/ -__ http://iri.columbia.edu/ +__ https://www.prism.oregonstate.edu/ +__ https://iri.columbia.edu/ .. ipython source code for this section we don't use this to avoid hitting the DAP server on every doc build. @@ -652,8 +652,8 @@ that require NASA's URS authentication:: ds = xr.open_dataset(store) -__ http://docs.python-requests.org -__ http://pydap.readthedocs.io/en/latest/client.html#authentication +__ https://docs.python-requests.org +__ https://www.pydap.org/en/latest/client.html#authentication .. _io.pickle: @@ -820,7 +820,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF .. _rasterio: https://rasterio.readthedocs.io/en/latest/ .. _rioxarray: https://corteva.github.io/rioxarray/stable/ -.. _test files: https://github.com/mapbox/rasterio/blob/master/tests/data/RGB.byte.tif +.. _test files: https://github.com/rasterio/rasterio/blob/master/tests/data/RGB.byte.tif .. _pyproj: https://github.com/pyproj4/pyproj .. _io.zarr: @@ -923,17 +923,17 @@ instance and pass this, as follows: (or use the utility function ``fsspec.get_mapper()``). .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ -.. _Zarr: http://zarr.readthedocs.io/ +.. _Zarr: https://zarr.readthedocs.io/ .. _Amazon S3: https://aws.amazon.com/s3/ .. _Google Cloud Storage: https://cloud.google.com/storage/ -.. _gcsfs: https://github.com/dask/gcsfs +.. _gcsfs: https://github.com/fsspec/gcsfs Zarr Compressors and Filters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are many different options for compression and filtering possible with zarr. These are described in the -`zarr documentation `_. +`zarr documentation `_. These options can be passed to the ``to_zarr`` method as variable encoding. For example: @@ -1156,7 +1156,7 @@ To use PseudoNetCDF to read such files, supply Add ``backend_kwargs={'format': ''}`` where `` options are listed on the PseudoNetCDF page. -.. _PseudoNetCDF: http://github.com/barronh/PseudoNetCDF +.. _PseudoNetCDF: https://github.com/barronh/PseudoNetCDF CSV and other formats supported by pandas diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst index acf1d16b6ee..a376b0a5cb8 100644 --- a/doc/user-guide/pandas.rst +++ b/doc/user-guide/pandas.rst @@ -11,8 +11,8 @@ ecosystem. For example, for plotting labeled data, we highly recommend using the visualization `built in to pandas itself`__ or provided by the pandas aware libraries such as `Seaborn`__. -__ http://pandas.pydata.org/pandas-docs/stable/visualization.html -__ http://seaborn.pydata.org/ +__ https://pandas.pydata.org/pandas-docs/stable/visualization.html +__ https://seaborn.pydata.org/ .. ipython:: python :suppress: @@ -32,7 +32,7 @@ Tabular data is easiest to work with when it meets the criteria for * Each column holds a different variable. * Each rows holds a different observation. -__ http://www.jstatsoft.org/v59/i10/ +__ https://www.jstatsoft.org/v59/i10/ In this "tidy data" format, we can represent any :py:class:`Dataset` and :py:class:`DataArray` in terms of :py:class:`~pandas.DataFrame` and @@ -241,5 +241,5 @@ While the xarray docs are relatively complete, a few items stand out for Panel u While xarray may take some getting used to, it's worth it! If anything is unclear, please post an issue on `GitHub `__ or -`StackOverflow `__, +`StackOverflow `__, and we'll endeavor to respond to the specific case or improve the general docs. diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index 1dce65b191c..d81ba30f12f 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -20,7 +20,7 @@ nicely into a pandas DataFrame then you're better off using one of the more developed tools there. Xarray plotting functionality is a thin wrapper around the popular -`matplotlib `_ library. +`matplotlib `_ library. Matplotlib syntax and function names were copied as much as possible, which makes for an easy transition between the two. Matplotlib must be installed before xarray can plot. @@ -32,11 +32,11 @@ needs to be installed. For more extensive plotting applications consider the following projects: -- `Seaborn `_: "provides +- `Seaborn `_: "provides a high-level interface for drawing attractive statistical graphics." Integrates well with pandas. -- `HoloViews `_ +- `HoloViews `_ and `GeoViews `_: "Composable, declarative data structures for building even complex visualizations easily." Includes native support for xarray objects. @@ -45,7 +45,7 @@ For more extensive plotting applications consider the following projects: dynamic plots (backed by ``Holoviews`` or ``Geoviews``) by adding a ``hvplot`` accessor to DataArrays. -- `Cartopy `_: Provides cartographic +- `Cartopy `_: Provides cartographic tools. Imports @@ -106,7 +106,7 @@ The simplest way to make a plot is to call the :py:func:`DataArray.plot()` metho @savefig plotting_1d_simple.png width=4in air1d.plot() -Xarray uses the coordinate name along with metadata ``attrs.long_name``, ``attrs.standard_name``, ``DataArray.name`` and ``attrs.units`` (if available) to label the axes. The names ``long_name``, ``standard_name`` and ``units`` are copied from the `CF-conventions spec `_. When choosing names, the order of precedence is ``long_name``, ``standard_name`` and finally ``DataArray.name``. The y-axis label in the above plot was constructed from the ``long_name`` and ``units`` attributes of ``air1d``. +Xarray uses the coordinate name along with metadata ``attrs.long_name``, ``attrs.standard_name``, ``DataArray.name`` and ``attrs.units`` (if available) to label the axes. The names ``long_name``, ``standard_name`` and ``units`` are copied from the `CF-conventions spec `_. When choosing names, the order of precedence is ``long_name``, ``standard_name`` and finally ``DataArray.name``. The y-axis label in the above plot was constructed from the ``long_name`` and ``units`` attributes of ``air1d``. .. ipython:: python @@ -123,7 +123,7 @@ matplotlib.pyplot.plot_ passing in the index and the array values as x and y, re So to make a line plot with blue triangles a matplotlib format string can be used: -.. _matplotlib.pyplot.plot: http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot +.. _matplotlib.pyplot.plot: https://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot .. ipython:: python :okwarning: @@ -563,7 +563,7 @@ You can also specify a list of discrete colors through the ``colors`` argument: @savefig plotting_custom_colors_levels.png width=4in air2d.plot(levels=[0, 12, 18, 30], colors=flatui) -Finally, if you have `Seaborn `_ +Finally, if you have `Seaborn `_ installed, you can also specify a seaborn color palette to the ``cmap`` argument. Note that ``levels`` *must* be specified with seaborn color palettes if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, @@ -687,7 +687,7 @@ The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.F that links a :py:class:`DataArray` to a matplotlib figure with a particular structure. This object can be used to control the behavior of the multiple plots. It borrows an API and code from `Seaborn's FacetGrid -`_. +`_. The structure is contained within the ``axes`` and ``name_dicts`` attributes, both 2d NumPy object arrays. @@ -1020,7 +1020,7 @@ You can however decide to infer the cell boundaries and use the yet. If you want to use these coordinates, you'll have to make the plots outside the xarray framework. -.. _cell boundaries: http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-boundaries +.. _cell boundaries: https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-boundaries One can also make line plots with multidimensional coordinates. In this case, ``hue`` must be a dimension name, not a coordinate name. diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst index 86dc5fbe51a..edfaaa49427 100644 --- a/doc/user-guide/reshaping.rst +++ b/doc/user-guide/reshaping.rst @@ -151,7 +151,7 @@ Stacking different variables together These stacking and unstacking operations are particularly useful for reshaping xarray objects for use in machine learning packages, such as `scikit-learn -`_, that usually require two-dimensional numpy +`_, that usually require two-dimensional numpy arrays as inputs. For datasets with only one variable, we only need ``stack`` and ``unstack``, but combining multiple variables in a :py:class:`xarray.Dataset` is more complicated. If the variables in the dataset diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst index 1813c125eed..36a57e37475 100644 --- a/doc/user-guide/time-series.rst +++ b/doc/user-guide/time-series.rst @@ -46,7 +46,7 @@ When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` attribute like ``'days since 2000-01-01'``). -.. _CF conventions: http://cfconventions.org +.. _CF conventions: https://cfconventions.org .. note:: @@ -111,7 +111,7 @@ Datetime components Similar `to pandas`_, the components of datetime objects contained in a given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. -.. _to pandas: http://pandas.pydata.org/pandas-docs/stable/basics.html#basics-dt-accessors +.. _to pandas: https://pandas.pydata.org/pandas-docs/stable/basics.html#basics-dt-accessors .. ipython:: python @@ -128,7 +128,7 @@ Xarray also supports a notion of "virtual" or "derived" coordinates for "day", "hour", "minute", "second", "dayofyear", "week", "dayofweek", "weekday" and "quarter": -__ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components +__ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. ipython:: python @@ -150,7 +150,7 @@ You can use these shortcuts with both Datasets and DataArray coordinates. In addition, xarray supports rounding operations ``floor``, ``ceil``, and ``round``. These operations require that you supply a `rounding frequency as a string argument.`__ -__ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases +__ https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. ipython:: python @@ -200,7 +200,7 @@ For upsampling or downsampling temporal resolutions, xarray offers a offered by the pandas method of the same name. Resample uses essentially the same api as ``resample`` `in pandas`_. -.. _in pandas: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#up-and-downsampling +.. _in pandas: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#up-and-downsampling For example, we can downsample our dataset from hourly to 6-hourly: diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 893e7b50429..d11c7c3a4f9 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -12,7 +12,7 @@ Weather and climate data Xarray can leverage metadata that follows the `Climate and Forecast (CF) conventions`_ if present. Examples include automatic labelling of plots with descriptive names and units if proper metadata is present (see :ref:`plotting`) and support for non-standard calendars used in climate science through the ``cftime`` module (see :ref:`CFTimeIndex`). There are also a number of geosciences-focused projects that build on xarray (see :ref:`ecosystem`). -.. _Climate and Forecast (CF) conventions: http://cfconventions.org +.. _Climate and Forecast (CF) conventions: https://cfconventions.org .. _cf_variables: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2cadf6ff478..9502beec327 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -118,7 +118,7 @@ Bug fixes - No longer raise an error for an all-nan-but-one argument to :py:meth:`DataArray.interpolate_na` when using `method='nearest'` (:issue:`5994`, :pull:`6144`). By `Michael Delgado `_. -- `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). +- `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. - Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat `_. @@ -1933,7 +1933,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Fix leap year condition in `monthly means example `_. +- Fix leap year condition in `monthly means example `_. By `Mickaël Lalande `_. - Fix the documentation of :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample`, explicitly stating that a @@ -2272,7 +2272,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Created a `PR checklist `_ +- Created a `PR checklist `_ as a quick reference for tasks before creating a new PR or pushing new commits. By `Gregory Gundersen `_. @@ -3337,7 +3337,7 @@ Backwards incompatible changes simple: convert your objects explicitly into NumPy arrays before calling the ufunc (e.g., with ``.values``). -.. _ufunc methods: https://docs.scipy.org/doc/numpy/reference/ufuncs.html#methods +.. _ufunc methods: https://numpy.org/doc/stable/reference/ufuncs.html#methods Enhancements ~~~~~~~~~~~~ @@ -4029,7 +4029,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- A new `gallery `_ +- A new `gallery `_ allows to add interactive examples to the documentation. By `Fabien Maussion `_. @@ -4781,8 +4781,8 @@ scientists who work with actual x-rays are interested in using this project in their work. Thanks for your understanding and patience in this transition. You can now find our documentation and code repository at new URLs: -- http://xarray.pydata.org -- http://github.com/pydata/xarray/ +- https://docs.xarray.dev +- https://github.com/pydata/xarray/ To ease the transition, we have simultaneously released v0.7.0 of both ``xray`` and ``xarray`` on the Python Package Index. These packages are @@ -5661,9 +5661,9 @@ is supporting out-of-core operations in xray using Dask_, a part of the Blaze_ project. For a preview of using Dask with weather data, read `this blog post`_ by Matthew Rocklin. See :issue:`328` for more details. -.. _Dask: http://dask.pydata.org -.. _Blaze: http://blaze.pydata.org -.. _this blog post: http://matthewrocklin.com/blog/work/2015/02/13/Towards-OOC-Slicing-and-Stacking/ +.. _Dask: https://dask.org +.. _Blaze: https://blaze.pydata.org +.. _this blog post: https://matthewrocklin.com/blog/work/2015/02/13/Towards-OOC-Slicing-and-Stacking v0.3.2 (23 December, 2014) -------------------------- diff --git a/setup.cfg b/setup.cfg index f9f8ae5c4dc..05b202810b4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,9 +51,9 @@ long_description = Learn more ---------- - - Documentation: ``_ - - Issue tracker: ``_ - - Source code: ``_ + - Documentation: ``_ + - Issue tracker: ``_ + - Source code: ``_ - SciPy2015 talk: ``_ url = https://github.com/pydata/xarray diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0ca82555c8f..548b98048ba 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -834,8 +834,8 @@ def open_mfdataset( References ---------- - .. [1] http://xarray.pydata.org/en/stable/dask.html - .. [2] http://xarray.pydata.org/en/stable/dask.html#chunking-and-performance + .. [1] https://docs.xarray.dev/en/stable/dask.html + .. [2] https://docs.xarray.dev/en/stable/dask.html#chunking-and-performance """ if isinstance(paths, str): if is_remote_uri(paths) and engine == "zarr": diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 735aa5fc3bc..70fc3a76266 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -280,7 +280,7 @@ def prepare_variable( raise NotImplementedError( "h5netcdf does not yet support setting a fill value for " "variable-length strings " - "(https://github.com/shoyer/h5netcdf/issues/37). " + "(https://github.com/h5netcdf/h5netcdf/issues/37). " f"Either remove '_FillValue' from encoding on variable {name!r} " "or set {'dtype': 'S1'} in encoding to use the fixed width " "NC_CHAR type." diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index a45ee78efd0..7444fbf11eb 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -126,23 +126,23 @@ def guess_engine(store_spec): f"backends {installed_engines}. Consider explicitly selecting one of the " "installed engines via the ``engine`` parameter, or installing " "additional IO dependencies, see:\n" - "http://xarray.pydata.org/en/stable/getting-started-guide/installing.html\n" - "http://xarray.pydata.org/en/stable/user-guide/io.html" + "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n" + "https://docs.xarray.dev/en/stable/user-guide/io.html" ) else: error_msg = ( "xarray is unable to open this file because it has no currently " "installed IO backends. Xarray's read/write support requires " "installing optional IO dependencies, see:\n" - "http://xarray.pydata.org/en/stable/getting-started-guide/installing.html\n" - "http://xarray.pydata.org/en/stable/user-guide/io" + "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n" + "https://docs.xarray.dev/en/stable/user-guide/io" ) else: error_msg = ( "found the following matches with the input file in xarray's IO " f"backends: {compatible_engines}. But their dependencies may not be installed, see:\n" - "http://xarray.pydata.org/en/stable/user-guide/io.html \n" - "http://xarray.pydata.org/en/stable/getting-started-guide/installing.html" + "https://docs.xarray.dev/en/stable/user-guide/io.html \n" + "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html" ) raise ValueError(error_msg) diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index 9600827a807..7f3791ffca2 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -189,7 +189,7 @@ def open_rasterio( >>> from affine import Affine >>> da = xr.open_rasterio( - ... "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/RGB.byte.tif" + ... "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/RGB.byte.tif" ... ) >>> da diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 7273d25253d..88eefbdc441 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1044,8 +1044,8 @@ def apply_ufunc( References ---------- - .. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html - .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html + .. [1] https://numpy.org/doc/stable/reference/ufuncs.html + .. [2] https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html """ from .dataarray import DataArray from .groupby import GroupBy diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6fe865a9f64..20e829d293e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3378,7 +3378,7 @@ def sortby( If multiple sorts along the same dimension is given, numpy's lexsort is performed along that dimension: - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lexsort.html + https://numpy.org/doc/stable/reference/generated/numpy.lexsort.html and the FIRST key in the sequence is used as the primary sort key, followed by the 2nd key, etc. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 83126f157a4..af59f5cd2f1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1892,7 +1892,7 @@ def to_netcdf( invalid_netcdf: bool, default: False Only valid along with ``engine="h5netcdf"``. If True, allow writing hdf5 files which are invalid netcdf as described in - https://github.com/shoyer/h5netcdf. + https://github.com/h5netcdf/h5netcdf. """ if encoding is None: encoding = {} @@ -6069,7 +6069,7 @@ def sortby(self, variables, ascending=True): If multiple sorts along the same dimension is given, numpy's lexsort is performed along that dimension: - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lexsort.html + https://numpy.org/doc/stable/reference/generated/numpy.lexsort.html and the FIRST key in the sequence is used as the primary sort key, followed by the 2nd key, etc. diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index 5f9349051b7..1e87e782fb2 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -34,7 +34,7 @@ def __eq__(self, other): # Pairs of types that, if both found, should be promoted to object dtype # instead of following NumPy's own type-promotion rules. These type promotion # rules match pandas instead. For reference, see the NumPy type hierarchy: -# https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html +# https://numpy.org/doc/stable/reference/arrays.scalars.html PROMOTE_TO_OBJECT = [ {np.number, np.character}, # numpy promotes to character {np.bool_, np.character}, # numpy promotes to character diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 581572cd0e1..17d026baa59 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -35,7 +35,7 @@ def expanded_indexer(key, ndim): key = (key,) new_key = [] # handling Ellipsis right is a little tricky, see: - # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing + # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing found_ellipsis = False for k in key: if k is Ellipsis: @@ -1146,7 +1146,7 @@ def _indexing_array_and_key(self, key): array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see - # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes). + # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). key = key.tuple + (Ellipsis,) else: raise TypeError(f"unexpected key type: {type(key)}") diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 3e0f550dd30..1feb97c5aa4 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -103,7 +103,7 @@ def _advanced_indexer_subspaces(key): # Nothing to reorder: dimensions on the indexing result are already # ordered like vindex. See NumPy's rule for "Combining advanced and # basic indexing": - # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#combining-advanced-and-basic-indexing + # https://numpy.org/doc/stable/reference/arrays.indexing.html#combining-advanced-and-basic-indexing return (), () non_slices = [k for k in key if not isinstance(k, slice)] diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 321759f3ef6..c0e340dd723 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1294,7 +1294,7 @@ def test_roundtrip_string_with_fill_value_vlen(self): # netCDF4-based backends don't support an explicit fillvalue # for variable length strings yet. # https://github.com/Unidata/netcdf4-python/issues/730 - # https://github.com/shoyer/h5netcdf/issues/37 + # https://github.com/h5netcdf/h5netcdf/issues/37 original = Dataset({"x": ("t", values, {}, {"_FillValue": "XXX"})}) with pytest.raises(NotImplementedError): with self.roundtrip(original) as actual: @@ -4733,7 +4733,7 @@ def test_rasterio_vrt_with_transform_and_size(self): # Test open_rasterio() support of WarpedVRT with transform, width and # height (issue #2864) - # https://github.com/mapbox/rasterio/1768 + # https://github.com/rasterio/rasterio/1768 rasterio = pytest.importorskip("rasterio", minversion="1.0.28") from affine import Affine from rasterio.warp import calculate_default_transform @@ -4763,7 +4763,7 @@ def test_rasterio_vrt_with_transform_and_size(self): def test_rasterio_vrt_with_src_crs(self): # Test open_rasterio() support of WarpedVRT with specified src_crs - # https://github.com/mapbox/rasterio/1768 + # https://github.com/rasterio/rasterio/1768 rasterio = pytest.importorskip("rasterio", minversion="1.0.28") # create geotiff with no CRS and specify it manually diff --git a/xarray/tests/test_cupy.py b/xarray/tests/test_cupy.py index e8f35e12ac6..79a540cdb38 100644 --- a/xarray/tests/test_cupy.py +++ b/xarray/tests/test_cupy.py @@ -11,7 +11,7 @@ def toy_weather_data(): """Construct the example DataSet from the Toy weather data example. - http://xarray.pydata.org/en/stable/examples/weather-data.html + https://docs.xarray.dev/en/stable/examples/weather-data.html Here we construct the DataSet exactly as shown in the example and then convert the numpy arrays to cupy. diff --git a/xarray/tutorial.py b/xarray/tutorial.py index d9ff3b1492d..fd8150bf8a6 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -33,8 +33,8 @@ def _construct_cache_dir(path): external_urls = {} # type: dict external_rasterio_urls = { - "RGB.byte": "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/RGB.byte.tif", - "shade": "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/shade.tif", + "RGB.byte": "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/RGB.byte.tif", + "shade": "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/shade.tif", } file_formats = { "air_temperature": 3, @@ -185,7 +185,7 @@ def open_rasterio( References ---------- - .. [1] https://github.com/mapbox/rasterio + .. [1] https://github.com/rasterio/rasterio """ try: import pooch From d994273b5d61bbdc2fde5050af5922deb66db274 Mon Sep 17 00:00:00 2001 From: Martin Bergemann Date: Thu, 10 Feb 2022 23:37:37 +0100 Subject: [PATCH 41/68] Fix pickling issue (#6249) * Add keyword argument for dtype in __new__ * Add tests for parallel reading of data with cftime axis * Add tests for pickling cftime index objects * Update whats-new.rst * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update suggestion in doc/whats-new.rst Co-authored-by: Spencer Clark * Add reuires_cftime docorator Co-authored-by: Spencer Clark * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Spencer Clark --- doc/whats-new.rst | 1 + xarray/coding/cftimeindex.py | 2 +- xarray/tests/test_cftimeindex.py | 10 ++++++++++ xarray/tests/test_distributed.py | 23 +++++++++++++++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9502beec327..02b341f963e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,6 +40,7 @@ Bug fixes - Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling `_. +- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`). By `Martin Bergemann `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index ac6904d4e31..8f9d19d7897 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -310,7 +310,7 @@ class CFTimeIndex(pd.Index): ) date_type = property(get_date_type) - def __new__(cls, data, name=None): + def __new__(cls, data, name=None, **kwargs): assert_all_valid_date_type(data) if name is None and hasattr(data, "name"): name = data.name diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 94f0cf4c2a5..c70fd53038b 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1,3 +1,4 @@ +import pickle from datetime import timedelta from textwrap import dedent @@ -1289,3 +1290,12 @@ def test_infer_freq(freq, calendar): indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar) out = xr.infer_freq(indx) assert out == freq + + +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_pickle_cftimeindex(calendar): + + idx = xr.cftime_range("2000-01-01", periods=3, freq="D", calendar=calendar) + idx_pkl = pickle.loads(pickle.dumps(idx)) + assert (idx == idx_pkl).all() diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index f70e1c7958e..a6ea792b5ac 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -1,5 +1,8 @@ """ isort:skip_file """ +import os import pickle +import numpy as np +import tempfile import pytest @@ -23,12 +26,15 @@ from . import ( assert_allclose, + assert_identical, has_h5netcdf, has_netCDF4, requires_rasterio, has_scipy, requires_zarr, requires_cfgrib, + requires_cftime, + requires_netCDF4, ) # this is to stop isort throwing errors. May have been easier to just use @@ -105,6 +111,23 @@ def test_dask_distributed_netcdf_roundtrip( assert_allclose(original, computed) +@requires_cftime +@requires_netCDF4 +def test_open_mfdataset_can_open_files_with_cftime_index(): + T = xr.cftime_range("20010101", "20010501", calendar="360_day") + Lon = np.arange(100) + data = np.random.random((T.size, Lon.size)) + da = xr.DataArray(data, coords={"time": T, "Lon": Lon}, name="test") + with cluster() as (s, [a, b]): + with Client(s["address"]): + with tempfile.TemporaryDirectory() as td: + data_file = os.path.join(td, "test.nc") + da.to_netcdf(data_file) + for parallel in (False, True): + with xr.open_mfdataset(data_file, parallel=parallel) as tf: + assert_identical(tf["test"], da) + + @pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS) def test_dask_distributed_read_netcdf_integration_test( loop, tmp_netcdf_filename, engine, nc_format From 472a16e5bcf5bcf375f23e215653631bd326b673 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 12 Feb 2022 14:50:31 +0100 Subject: [PATCH 42/68] Update .pre-commit-config.yaml (#6270) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ac1d1e3c3d..63a2871b496 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: hooks: - id: pyupgrade args: - - "--py37-plus" + - "--py38-plus" # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black rev: 22.1.0 From 8c5c230881d616af32c5a42cca261eb11fce3916 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Feb 2022 01:05:41 -0800 Subject: [PATCH 43/68] Bump actions/github-script from 5 to 6 (#6273) --- .github/workflows/upstream-dev-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index f1ce442c623..f6f97fd67e3 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -126,7 +126,7 @@ jobs: shopt -s globstar python .github/workflows/parse_logs.py logs/**/*-log - name: Report failures - uses: actions/github-script@v5 + uses: actions/github-script@v6 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | From 7790863435c612f3e1205292bd7f4efa847fd310 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Wed, 16 Feb 2022 00:05:04 +0100 Subject: [PATCH 44/68] Implement multiplication of cftime Tick offsets by floats (#6135) --- doc/whats-new.rst | 8 +++ xarray/coding/cftime_offsets.py | 61 +++++++++++++++--- xarray/coding/cftimeindex.py | 17 ++--- xarray/tests/test_cftime_offsets.py | 96 +++++++++++++++++++++++------ xarray/tests/test_cftimeindex.py | 52 +++++++++++++--- 5 files changed, 190 insertions(+), 44 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 02b341f963e..88453a641e0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -19,9 +19,15 @@ What's New v2022.02.0 (unreleased) ----------------------- + New Features ~~~~~~~~~~~~ +- Enabled multiplying tick offsets by floats. Allows ``float`` ``n`` in + :py:meth:`CFTimeIndex.shift` if ``shift_freq`` is between ``Day`` + and ``Microsecond``. (:issue:`6134`, :pull:`6135`). + By `Aaron Spring `_. + Breaking changes ~~~~~~~~~~~~~~~~ @@ -46,6 +52,7 @@ Documentation ~~~~~~~~~~~~~ + Internal Changes ~~~~~~~~~~~~~~~~ @@ -86,6 +93,7 @@ New Features - Enable the limit option for dask array in the following methods :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` (:issue:`6112`) By `Joseph Nowak `_. + Breaking changes ~~~~~~~~~~~~~~~~ - Rely on matplotlib's default datetime converters instead of pandas' (:issue:`6102`, :pull:`6109`). diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 30bfd882b5c..a4e2870650d 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -39,11 +39,12 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import annotations import re from datetime import datetime, timedelta from functools import partial -from typing import ClassVar, Optional +from typing import ClassVar import numpy as np import pandas as pd @@ -87,10 +88,10 @@ def get_date_type(calendar, use_cftime=True): class BaseCFTimeOffset: - _freq: ClassVar[Optional[str]] = None - _day_option: ClassVar[Optional[str]] = None + _freq: ClassVar[str | None] = None + _day_option: ClassVar[str | None] = None - def __init__(self, n=1): + def __init__(self, n: int = 1): if not isinstance(n, int): raise TypeError( "The provided multiple 'n' must be an integer. " @@ -122,6 +123,8 @@ def __sub__(self, other): return NotImplemented def __mul__(self, other): + if not isinstance(other, int): + return NotImplemented return type(self)(n=other * self.n) def __neg__(self): @@ -171,6 +174,40 @@ def _get_offset_day(self, other): return _get_day_of_month(other, self._day_option) +class Tick(BaseCFTimeOffset): + # analogous https://github.com/pandas-dev/pandas/blob/ccb25ab1d24c4fb9691270706a59c8d319750870/pandas/_libs/tslibs/offsets.pyx#L806 + + def _next_higher_resolution(self): + self_type = type(self) + if self_type not in [Day, Hour, Minute, Second, Millisecond]: + raise ValueError("Could not convert to integer offset at any resolution") + if type(self) is Day: + return Hour(self.n * 24) + if type(self) is Hour: + return Minute(self.n * 60) + if type(self) is Minute: + return Second(self.n * 60) + if type(self) is Second: + return Millisecond(self.n * 1000) + if type(self) is Millisecond: + return Microsecond(self.n * 1000) + + def __mul__(self, other): + if not isinstance(other, (int, float)): + return NotImplemented + if isinstance(other, float): + n = other * self.n + # If the new `n` is an integer, we can represent it using the + # same BaseCFTimeOffset subclass as self, otherwise we need to move up + # to a higher-resolution subclass + if np.isclose(n % 1, 0): + return type(self)(int(n)) + + new_self = self._next_higher_resolution() + return new_self * other + return type(self)(n=other * self.n) + + def _get_day_of_month(other, day_option): """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's onOffset policy, as described by the `day_option` argument. @@ -396,6 +433,8 @@ def __sub__(self, other): return NotImplemented def __mul__(self, other): + if isinstance(other, float): + return NotImplemented return type(self)(n=other * self.n, month=self.month) def rule_code(self): @@ -482,6 +521,8 @@ def __sub__(self, other): return NotImplemented def __mul__(self, other): + if isinstance(other, float): + return NotImplemented return type(self)(n=other * self.n, month=self.month) def rule_code(self): @@ -541,7 +582,7 @@ def rollback(self, date): return date - YearEnd(month=self.month) -class Day(BaseCFTimeOffset): +class Day(Tick): _freq = "D" def as_timedelta(self): @@ -551,7 +592,7 @@ def __apply__(self, other): return other + self.as_timedelta() -class Hour(BaseCFTimeOffset): +class Hour(Tick): _freq = "H" def as_timedelta(self): @@ -561,7 +602,7 @@ def __apply__(self, other): return other + self.as_timedelta() -class Minute(BaseCFTimeOffset): +class Minute(Tick): _freq = "T" def as_timedelta(self): @@ -571,7 +612,7 @@ def __apply__(self, other): return other + self.as_timedelta() -class Second(BaseCFTimeOffset): +class Second(Tick): _freq = "S" def as_timedelta(self): @@ -581,7 +622,7 @@ def __apply__(self, other): return other + self.as_timedelta() -class Millisecond(BaseCFTimeOffset): +class Millisecond(Tick): _freq = "L" def as_timedelta(self): @@ -591,7 +632,7 @@ def __apply__(self, other): return other + self.as_timedelta() -class Microsecond(BaseCFTimeOffset): +class Microsecond(Tick): _freq = "U" def as_timedelta(self): diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 8f9d19d7897..d522d7910d4 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -38,11 +38,11 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import annotations import re import warnings from datetime import timedelta -from typing import Tuple, Type import numpy as np import pandas as pd @@ -66,7 +66,7 @@ REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END = 10 -OUT_OF_BOUNDS_TIMEDELTA_ERRORS: Tuple[Type[Exception], ...] +OUT_OF_BOUNDS_TIMEDELTA_ERRORS: tuple[type[Exception], ...] try: OUT_OF_BOUNDS_TIMEDELTA_ERRORS = (pd.errors.OutOfBoundsTimedelta, OverflowError) except AttributeError: @@ -511,7 +511,7 @@ def contains(self, key): """Needed for .loc based partial-string indexing""" return self.__contains__(key) - def shift(self, n, freq): + def shift(self, n: int | float, freq: str | timedelta): """Shift the CFTimeIndex a multiple of the given frequency. See the documentation for :py:func:`~xarray.cftime_range` for a @@ -519,7 +519,7 @@ def shift(self, n, freq): Parameters ---------- - n : int + n : int, float if freq of days or below Periods to shift by freq : str or datetime.timedelta A frequency string or datetime.timedelta object to shift by @@ -541,14 +541,15 @@ def shift(self, n, freq): >>> index.shift(1, "M") CFTimeIndex([2000-02-29 00:00:00], dtype='object', length=1, calendar='standard', freq=None) + >>> index.shift(1.5, "D") + CFTimeIndex([2000-02-01 12:00:00], + dtype='object', length=1, calendar='standard', freq=None) """ - from .cftime_offsets import to_offset - - if not isinstance(n, int): - raise TypeError(f"'n' must be an int, got {n}.") if isinstance(freq, timedelta): return self + n * freq elif isinstance(freq, str): + from .cftime_offsets import to_offset + return self + n * to_offset(freq) else: raise TypeError( diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 4f94b35e3c3..3879959675f 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -18,6 +18,7 @@ QuarterBegin, QuarterEnd, Second, + Tick, YearBegin, YearEnd, _days_in_month, @@ -54,11 +55,25 @@ def calendar(request): (YearEnd(), 1), (QuarterBegin(), 1), (QuarterEnd(), 1), + (Tick(), 1), + (Day(), 1), + (Hour(), 1), + (Minute(), 1), + (Second(), 1), + (Millisecond(), 1), + (Microsecond(), 1), (BaseCFTimeOffset(n=2), 2), (YearBegin(n=2), 2), (YearEnd(n=2), 2), (QuarterBegin(n=2), 2), (QuarterEnd(n=2), 2), + (Tick(n=2), 2), + (Day(n=2), 2), + (Hour(n=2), 2), + (Minute(n=2), 2), + (Second(n=2), 2), + (Millisecond(n=2), 2), + (Microsecond(n=2), 2), ], ids=_id_func, ) @@ -74,6 +89,15 @@ def test_cftime_offset_constructor_valid_n(offset, expected_n): (YearEnd, 1.5), (QuarterBegin, 1.5), (QuarterEnd, 1.5), + (MonthBegin, 1.5), + (MonthEnd, 1.5), + (Tick, 1.5), + (Day, 1.5), + (Hour, 1.5), + (Minute, 1.5), + (Second, 1.5), + (Millisecond, 1.5), + (Microsecond, 1.5), ], ids=_id_func, ) @@ -359,30 +383,64 @@ def test_eq(a, b): _MUL_TESTS = [ - (BaseCFTimeOffset(), BaseCFTimeOffset(n=3)), - (YearEnd(), YearEnd(n=3)), - (YearBegin(), YearBegin(n=3)), - (QuarterEnd(), QuarterEnd(n=3)), - (QuarterBegin(), QuarterBegin(n=3)), - (MonthEnd(), MonthEnd(n=3)), - (MonthBegin(), MonthBegin(n=3)), - (Day(), Day(n=3)), - (Hour(), Hour(n=3)), - (Minute(), Minute(n=3)), - (Second(), Second(n=3)), - (Millisecond(), Millisecond(n=3)), - (Microsecond(), Microsecond(n=3)), + (BaseCFTimeOffset(), 3, BaseCFTimeOffset(n=3)), + (YearEnd(), 3, YearEnd(n=3)), + (YearBegin(), 3, YearBegin(n=3)), + (QuarterEnd(), 3, QuarterEnd(n=3)), + (QuarterBegin(), 3, QuarterBegin(n=3)), + (MonthEnd(), 3, MonthEnd(n=3)), + (MonthBegin(), 3, MonthBegin(n=3)), + (Tick(), 3, Tick(n=3)), + (Day(), 3, Day(n=3)), + (Hour(), 3, Hour(n=3)), + (Minute(), 3, Minute(n=3)), + (Second(), 3, Second(n=3)), + (Millisecond(), 3, Millisecond(n=3)), + (Microsecond(), 3, Microsecond(n=3)), + (Day(), 0.5, Hour(n=12)), + (Hour(), 0.5, Minute(n=30)), + (Minute(), 0.5, Second(n=30)), + (Second(), 0.5, Millisecond(n=500)), + (Millisecond(), 0.5, Microsecond(n=500)), ] -@pytest.mark.parametrize(("offset", "expected"), _MUL_TESTS, ids=_id_func) -def test_mul(offset, expected): - assert offset * 3 == expected +@pytest.mark.parametrize(("offset", "multiple", "expected"), _MUL_TESTS, ids=_id_func) +def test_mul(offset, multiple, expected): + assert offset * multiple == expected -@pytest.mark.parametrize(("offset", "expected"), _MUL_TESTS, ids=_id_func) -def test_rmul(offset, expected): - assert 3 * offset == expected +@pytest.mark.parametrize(("offset", "multiple", "expected"), _MUL_TESTS, ids=_id_func) +def test_rmul(offset, multiple, expected): + assert multiple * offset == expected + + +def test_mul_float_multiple_next_higher_resolution(): + """Test more than one iteration through _next_higher_resolution is required.""" + assert 1e-6 * Second() == Microsecond() + assert 1e-6 / 60 * Minute() == Microsecond() + + +@pytest.mark.parametrize( + "offset", + [YearBegin(), YearEnd(), QuarterBegin(), QuarterEnd(), MonthBegin(), MonthEnd()], + ids=_id_func, +) +def test_nonTick_offset_multiplied_float_error(offset): + """Test that the appropriate error is raised if a non-Tick offset is + multiplied by a float.""" + with pytest.raises(TypeError, match="unsupported operand type"): + offset * 0.5 + + +def test_Microsecond_multiplied_float_error(): + """Test that the appropriate error is raised if a Tick offset is multiplied + by a float which causes it not to be representable by a + microsecond-precision timedelta.""" + with pytest.raises( + ValueError, match="Could not convert to integer offset at any resolution" + ): + Microsecond() * 0.5 @pytest.mark.parametrize( diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index c70fd53038b..2c6a0796c5f 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -755,7 +755,7 @@ def test_cftimeindex_add(index): @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) -def test_cftimeindex_add_timedeltaindex(calendar): +def test_cftimeindex_add_timedeltaindex(calendar) -> None: a = xr.cftime_range("2000", periods=5, calendar=calendar) deltas = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)]) result = a + deltas @@ -764,6 +764,44 @@ def test_cftimeindex_add_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize("n", [2.0, 1.5]) +@pytest.mark.parametrize( + "freq,units", + [ + ("D", "D"), + ("H", "H"), + ("T", "min"), + ("S", "S"), + ("L", "ms"), + ], +) +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_shift_float(n, freq, units, calendar) -> None: + a = xr.cftime_range("2000", periods=3, calendar=calendar, freq="D") + result = a + pd.Timedelta(n, units) + expected = a.shift(n, freq) + assert result.equals(expected) + assert isinstance(result, CFTimeIndex) + + +@requires_cftime +def test_cftimeindex_shift_float_us() -> None: + a = xr.cftime_range("2000", periods=3, freq="D") + with pytest.raises( + ValueError, match="Could not convert to integer offset at any resolution" + ): + a.shift(2.5, "us") + + +@requires_cftime +@pytest.mark.parametrize("freq", ["AS", "A", "YS", "Y", "QS", "Q", "MS", "M"]) +def test_cftimeindex_shift_float_fails_for_non_tick_freqs(freq) -> None: + a = xr.cftime_range("2000", periods=3, freq="D") + with pytest.raises(TypeError, match="unsupported operand type"): + a.shift(2.5, freq) + + @requires_cftime def test_cftimeindex_radd(index): date_type = index.date_type @@ -781,7 +819,7 @@ def test_cftimeindex_radd(index): @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) -def test_timedeltaindex_add_cftimeindex(calendar): +def test_timedeltaindex_add_cftimeindex(calendar) -> None: a = xr.cftime_range("2000", periods=5, calendar=calendar) deltas = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)]) result = deltas + a @@ -829,7 +867,7 @@ def test_cftimeindex_sub_timedelta_array(index, other): @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) -def test_cftimeindex_sub_cftimeindex(calendar): +def test_cftimeindex_sub_cftimeindex(calendar) -> None: a = xr.cftime_range("2000", periods=5, calendar=calendar) b = a.shift(2, "D") result = b - a @@ -868,7 +906,7 @@ def test_distant_cftime_datetime_sub_cftimeindex(calendar): @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) -def test_cftimeindex_sub_timedeltaindex(calendar): +def test_cftimeindex_sub_timedeltaindex(calendar) -> None: a = xr.cftime_range("2000", periods=5, calendar=calendar) deltas = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)]) result = a - deltas @@ -904,7 +942,7 @@ def test_cftimeindex_rsub(index): @requires_cftime @pytest.mark.parametrize("freq", ["D", timedelta(days=1)]) -def test_cftimeindex_shift(index, freq): +def test_cftimeindex_shift(index, freq) -> None: date_type = index.date_type expected_dates = [ date_type(1, 1, 3), @@ -919,14 +957,14 @@ def test_cftimeindex_shift(index, freq): @requires_cftime -def test_cftimeindex_shift_invalid_n(): +def test_cftimeindex_shift_invalid_n() -> None: index = xr.cftime_range("2000", periods=3) with pytest.raises(TypeError): index.shift("a", "D") @requires_cftime -def test_cftimeindex_shift_invalid_freq(): +def test_cftimeindex_shift_invalid_freq() -> None: index = xr.cftime_range("2000", periods=3) with pytest.raises(TypeError): index.shift(1, 1) From 07456ed71b89704a438b17cb3bcf442f5f208d5a Mon Sep 17 00:00:00 2001 From: crusaderky Date: Wed, 16 Feb 2022 16:32:35 +0000 Subject: [PATCH 45/68] Remove xfail from tests decorated by @gen_cluster (#6282) --- xarray/tests/test_distributed.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index a6ea792b5ac..b97032014c4 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -5,14 +5,14 @@ import tempfile import pytest +from packaging.version import Version dask = pytest.importorskip("dask") # isort:skip distributed = pytest.importorskip("distributed") # isort:skip from dask.distributed import Client, Lock -from distributed.utils_test import cluster, gen_cluster -from distributed.utils_test import loop from distributed.client import futures_of +from distributed.utils_test import cluster, gen_cluster, loop import xarray as xr from xarray.backends.locks import HDF5_LOCK, CombinedLock @@ -208,7 +208,10 @@ def test_dask_distributed_cfgrib_integration_test(loop) -> None: assert_allclose(actual, expected) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/pull/6211") +@pytest.mark.xfail( + condition=Version(distributed.__version__) < Version("2022.02.0"), + reason="https://github.com/dask/distributed/pull/5739", +) @gen_cluster(client=True) async def test_async(c, s, a, b) -> None: x = create_test_data() @@ -241,7 +244,10 @@ def test_hdf5_lock() -> None: assert isinstance(HDF5_LOCK, dask.utils.SerializableLock) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/pull/6211") +@pytest.mark.xfail( + condition=Version(distributed.__version__) < Version("2022.02.0"), + reason="https://github.com/dask/distributed/pull/5739", +) @gen_cluster(client=True) async def test_serializable_locks(c, s, a, b) -> None: def f(x, lock=None): From dfaedb2773208c78ab93940ef4a1979238ee0f55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Gli=C3=9F?= Date: Thu, 17 Feb 2022 13:51:48 +0100 Subject: [PATCH 46/68] Allow to parse more backend kwargs to pydap backend (#6276) * propose implementation of #6274 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * minor fix * Add tests for new method PydapDataStore._update_default_params * minor fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add entry in whats-new.rst * Minor change * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Define open_dataset_params for cleaner solution * remove import of inspect lib * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update xarray/backends/pydap_.py Co-authored-by: Mathias Hauser * update whats-new * Set pydap backend arguments explicitly * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * set defaults in PydapDataStore.open rather than entry point * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Jonas Gliss Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mathias Hauser --- doc/whats-new.rst | 4 ++++ xarray/backends/pydap_.py | 47 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 88453a641e0..37abf931eb4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,10 @@ New Features and ``Microsecond``. (:issue:`6134`, :pull:`6135`). By `Aaron Spring `_. +- Enbable to provide more keyword arguments to `pydap` backend when reading + OpenDAP datasets (:issue:`6274`). + By `Jonas Gliß `. + Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index ffaf3793928..a5a1430abf2 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -86,9 +86,40 @@ def __init__(self, ds): self.ds = ds @classmethod - def open(cls, url, session=None): + def open( + cls, + url, + application=None, + session=None, + output_grid=None, + timeout=None, + verify=None, + user_charset=None, + ): + + if output_grid is None: + output_grid = True + + if verify is None: + verify = True + + if timeout is None: + from pydap.lib import DEFAULT_TIMEOUT - ds = pydap.client.open_url(url, session=session) + timeout = DEFAULT_TIMEOUT + + if user_charset is None: + user_charset = "ascii" + + ds = pydap.client.open_url( + url=url, + application=application, + session=session, + output_grid=output_grid, + timeout=timeout, + verify=verify, + user_charset=user_charset, + ) return cls(ds) def open_store_variable(self, var): @@ -123,12 +154,22 @@ def open_dataset( drop_variables=None, use_cftime=None, decode_timedelta=None, + application=None, session=None, + output_grid=None, + timeout=None, + verify=None, + user_charset=None, ): store = PydapDataStore.open( - filename_or_obj, + url=filename_or_obj, + application=application, session=session, + output_grid=output_grid, + timeout=timeout, + verify=verify, + user_charset=user_charset, ) store_entrypoint = StoreBackendEntrypoint() From 678bc68b85eb4ea14d9cb262e683e65070d8a31b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 18 Feb 2022 07:01:57 -0800 Subject: [PATCH 47/68] Add pytest-gha-annotations (#6271) This looks really good -- it adds an annotation to the PR inline for any failures. I'm not sure how it will do with our many environments -- we may have to find a way of only having it run on one environment if it doesn't de-dupe them. --- ci/requirements/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 516c964afc7..9269a70badf 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -37,6 +37,7 @@ dependencies: - pytest - pytest-cov - pytest-env + - pytest-github-actions-annotate-failures - pytest-xdist - rasterio - scipy From 33fbb648ac042f821a11870ffa544e5bcb6e178f Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 18 Feb 2022 17:51:55 +0100 Subject: [PATCH 48/68] use `warnings.catch_warnings(record=True)` instead of `pytest.warns(None)` (#6251) * no longer use pytest.warns(None) * use 'assert_no_warnings' --- xarray/tests/__init__.py | 17 ++++++++++------- xarray/tests/test_backends.py | 13 ++++++------- xarray/tests/test_coding_times.py | 13 +++++-------- xarray/tests/test_dataarray.py | 4 ++-- xarray/tests/test_dataset.py | 6 +++--- xarray/tests/test_ufuncs.py | 5 ++--- xarray/tests/test_variable.py | 4 ++-- 7 files changed, 30 insertions(+), 32 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 20dfdaf5076..00fec07f793 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -1,7 +1,7 @@ import importlib import platform import warnings -from contextlib import contextmanager +from contextlib import contextmanager, nullcontext from unittest import mock # noqa: F401 import numpy as np @@ -113,15 +113,10 @@ def __call__(self, dsk, keys, **kwargs): return dask.get(dsk, keys, **kwargs) -@contextmanager -def dummy_context(): - yield None - - def raise_if_dask_computes(max_computes=0): # return a dummy context manager so that this can be used for non-dask objects if not has_dask: - return dummy_context() + return nullcontext() scheduler = CountingScheduler(max_computes) return dask.config.set(scheduler=scheduler) @@ -170,6 +165,14 @@ def source_ndarray(array): return base +@contextmanager +def assert_no_warnings(): + + with warnings.catch_warnings(record=True) as record: + yield record + assert len(record) == 0, "got unexpected warning(s)" + + # Internal versions of xarray's test functions that validate additional # invariants diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c0e340dd723..1d0342dd344 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -54,6 +54,7 @@ assert_array_equal, assert_equal, assert_identical, + assert_no_warnings, has_dask, has_h5netcdf_0_12, has_netCDF4, @@ -1814,12 +1815,11 @@ def test_warning_on_bad_chunks(self): good_chunks = ({"dim2": 3}, {"dim3": (6, 4)}, {}) for chunks in good_chunks: kwargs = {"chunks": chunks} - with pytest.warns(None) as record: + with assert_no_warnings(): with self.roundtrip(original, open_kwargs=kwargs) as actual: for k, v in actual.variables.items(): # only index variables should be in memory assert v._in_memory == (k in actual.dims) - assert len(record) == 0 @requires_dask def test_deprecate_auto_chunk(self): @@ -4986,10 +4986,9 @@ def test_dataarray_to_netcdf_no_name_pathlib(self): @requires_scipy_or_netCDF4 def test_no_warning_from_dask_effective_get(): with create_tmp_file() as tmpfile: - with pytest.warns(None) as record: + with assert_no_warnings(): ds = Dataset() ds.to_netcdf(tmpfile) - assert len(record) == 0 @requires_scipy_or_netCDF4 @@ -5031,7 +5030,7 @@ def test_use_cftime_standard_calendar_default_in_range(calendar): with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: with open_dataset(tmp_file) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) @@ -5094,7 +5093,7 @@ def test_use_cftime_true(calendar, units_year): with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: with open_dataset(tmp_file, use_cftime=True) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) @@ -5123,7 +5122,7 @@ def test_use_cftime_false_standard_calendar_in_range(calendar): with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: with open_dataset(tmp_file, use_cftime=False) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 2e19ddb3a75..92d27f22eb8 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -32,6 +32,7 @@ from . import ( arm_xfail, assert_array_equal, + assert_no_warnings, has_cftime, has_cftime_1_4_1, requires_cftime, @@ -905,10 +906,9 @@ def test_use_cftime_default_standard_calendar_in_range(calendar) -> None: units = "days since 2000-01-01" expected = pd.date_range("2000", periods=2) - with pytest.warns(None) as record: + with assert_no_warnings(): result = decode_cf_datetime(numerical_dates, units, calendar) np.testing.assert_array_equal(result, expected) - assert not record @requires_cftime @@ -942,10 +942,9 @@ def test_use_cftime_default_non_standard_calendar(calendar, units_year) -> None: numerical_dates, units, calendar, only_use_cftime_datetimes=True ) - with pytest.warns(None) as record: + with assert_no_warnings(): result = decode_cf_datetime(numerical_dates, units, calendar) np.testing.assert_array_equal(result, expected) - assert not record @requires_cftime @@ -960,10 +959,9 @@ def test_use_cftime_true(calendar, units_year) -> None: numerical_dates, units, calendar, only_use_cftime_datetimes=True ) - with pytest.warns(None) as record: + with assert_no_warnings(): result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=True) np.testing.assert_array_equal(result, expected) - assert not record @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) @@ -972,10 +970,9 @@ def test_use_cftime_false_standard_calendar_in_range(calendar) -> None: units = "days since 2000-01-01" expected = pd.date_range("2000", periods=2) - with pytest.warns(None) as record: + with assert_no_warnings(): result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False) np.testing.assert_array_equal(result, expected) - assert not record @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b707ae2a063..8d73f9ec7ee 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -33,6 +33,7 @@ assert_chunks_equal, assert_equal, assert_identical, + assert_no_warnings, has_dask, raise_if_dask_computes, requires_bottleneck, @@ -6155,9 +6156,8 @@ def test_rolling_keep_attrs(funcname, argument): def test_raise_no_warning_for_nan_in_binary_ops(): - with pytest.warns(None) as record: + with assert_no_warnings(): xr.DataArray([1, 2, np.NaN]) > 0 - assert len(record) == 0 @pytest.mark.filterwarnings("error") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index fed886465ed..c4fa847e664 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -38,6 +38,7 @@ assert_array_equal, assert_equal, assert_identical, + assert_no_warnings, create_test_data, has_cftime, has_dask, @@ -1873,7 +1874,7 @@ def test_reindex_warning(self): # Should not warn ind = xr.DataArray([0.0, 1.0], dims=["dim2"], name="ind") - with pytest.warns(None) as ws: + with warnings.catch_warnings(record=True) as ws: data.reindex(dim2=ind) assert len(ws) == 0 @@ -6165,9 +6166,8 @@ def test_ndrolling_construct(center, fill_value, dask): def test_raise_no_warning_for_nan_in_binary_ops(): - with pytest.warns(None) as record: + with assert_no_warnings(): Dataset(data_vars={"x": ("y", [1, 2, np.NaN])}) > 0 - assert len(record) == 0 @pytest.mark.filterwarnings("error") diff --git a/xarray/tests/test_ufuncs.py b/xarray/tests/test_ufuncs.py index 3379fba44f8..590ae9ae003 100644 --- a/xarray/tests/test_ufuncs.py +++ b/xarray/tests/test_ufuncs.py @@ -8,7 +8,7 @@ from . import assert_array_equal from . import assert_identical as assert_identical_ -from . import mock +from . import assert_no_warnings, mock def assert_identical(a, b): @@ -164,9 +164,8 @@ def test_xarray_ufuncs_deprecation(): with pytest.warns(FutureWarning, match="xarray.ufuncs"): xu.cos(xr.DataArray([0, 1])) - with pytest.warns(None) as record: + with assert_no_warnings(): xu.angle(xr.DataArray([0, 1])) - assert len(record) == 0 @pytest.mark.filterwarnings("ignore::RuntimeWarning") diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 33fff62c304..a88d5a22c0d 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -33,6 +33,7 @@ assert_array_equal, assert_equal, assert_identical, + assert_no_warnings, raise_if_dask_computes, requires_cupy, requires_dask, @@ -2537,9 +2538,8 @@ def __init__(self, array): def test_raise_no_warning_for_nan_in_binary_ops(): - with pytest.warns(None) as record: + with assert_no_warnings(): Variable("x", [1, 2, np.NaN]) > 0 - assert len(record) == 0 class TestBackendIndexing: From d26894bdc3d80acd117a16a528663bcdf26bab32 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 22 Feb 2022 19:49:52 -0800 Subject: [PATCH 49/68] Move Zarr up in io.rst (#6289) * Move Zarr up in io.rst The existing version had it right down the page, below Iris / Pickle / et al. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- doc/contributing.rst | 4 +- doc/user-guide/io.rst | 858 ++++++++++++++++++------------------- doc/whats-new.rst | 4 +- xarray/core/computation.py | 2 +- 4 files changed, 434 insertions(+), 434 deletions(-) diff --git a/doc/contributing.rst b/doc/contributing.rst index df279caa54f..0913702fd83 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -274,13 +274,13 @@ Some other important things to know about the docs: .. ipython:: python x = 2 - x ** 3 + x**3 will be rendered as:: In [1]: x = 2 - In [2]: x ** 3 + In [2]: x**3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 28eeeeda99b..834e9ad2464 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -498,596 +498,596 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: Note that this produces a file that is likely to be not readable by other netCDF libraries! -.. _io.iris: +.. _io.zarr: -Iris +Zarr ---- -The Iris_ tool allows easy reading of common meteorological and climate model formats -(including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very -similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is -installed, xarray can convert a ``DataArray`` into a ``Cube`` using -:py:meth:`DataArray.to_iris`: - -.. ipython:: python +`Zarr`_ is a Python package that provides an implementation of chunked, compressed, +N-dimensional arrays. +Zarr has the ability to store arrays in a range of ways, including in memory, +in files, and in cloud-based object storage such as `Amazon S3`_ and +`Google Cloud Storage`_. +Xarray's Zarr backend allows xarray to leverage these capabilities, including +the ability to store and analyze datasets far too large fit onto disk +(particularly :ref:`in combination with dask `). - da = xr.DataArray( - np.random.rand(4, 5), - dims=["x", "y"], - coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), - ) +Xarray can't open just any zarr dataset, because xarray requires special +metadata (attributes) describing the dataset dimensions and coordinates. +At this time, xarray can only open zarr datasets that have been written by +xarray. For implementation details, see :ref:`zarr_encoding`. - cube = da.to_iris() - cube +To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method. -Conversely, we can create a new ``DataArray`` object from a ``Cube`` using -:py:meth:`DataArray.from_iris`: +To write to a local directory, we pass a path to a directory: .. ipython:: python + :suppress: - da_cube = xr.DataArray.from_iris(cube) - da_cube + ! rm -rf path/to/directory.zarr +.. ipython:: python -.. _Iris: https://scitools.org.uk/iris + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) + ds.to_zarr("path/to/directory.zarr") +(The suffix ``.zarr`` is optional--just a reminder that a zarr store lives +there.) If the directory does not exist, it will be created. If a zarr +store is already present at that path, an error will be raised, preventing it +from being overwritten. To override this behavior and overwrite an existing +store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`. -OPeNDAP -------- +To store variable length strings, convert them to object arrays first with +``dtype=object``. -Xarray includes support for `OPeNDAP`__ (via the netCDF4 library or Pydap), which -lets us access large datasets over HTTP. +To read back a zarr dataset that has been created this way, we use the +:py:func:`open_zarr` method: -__ https://www.opendap.org/ +.. ipython:: python -For example, we can open a connection to GBs of weather data produced by the -`PRISM`__ project, and hosted by `IRI`__ at Columbia: + ds_zarr = xr.open_zarr("path/to/directory.zarr") + ds_zarr -__ https://www.prism.oregonstate.edu/ -__ https://iri.columbia.edu/ +Cloud Storage Buckets +~~~~~~~~~~~~~~~~~~~~~ -.. ipython source code for this section - we don't use this to avoid hitting the DAP server on every doc build. +It is possible to read and write xarray datasets directly from / to cloud +storage buckets using zarr. This example uses the `gcsfs`_ package to provide +an interface to `Google Cloud Storage`_. - remote_data = xr.open_dataset( - 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - decode_times=False) - tmax = remote_data.tmax[:500, ::3, ::3] - tmax +From v0.16.2: general `fsspec`_ URLs are parsed and the store set up for you +automatically when reading, such that you can open a dataset in a single +call. You should include any arguments to the storage backend as the +key ``storage_options``, part of ``backend_kwargs``. - @savefig opendap-prism-tmax.png - tmax[0].plot() +.. code:: python -.. ipython:: - :verbatim: + ds_gcs = xr.open_dataset( + "gcs:///path.zarr", + backend_kwargs={ + "storage_options": {"project": "", "token": None} + }, + engine="zarr", + ) - In [3]: remote_data = xr.open_dataset( - ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", - ...: decode_times=False, - ...: ) - In [4]: remote_data - Out[4]: - - Dimensions: (T: 1422, X: 1405, Y: 621) - Coordinates: - * X (X) float32 -125.0 -124.958 -124.917 -124.875 -124.833 -124.792 -124.75 ... - * T (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 -772.5 -771.5 ... - * Y (Y) float32 49.9167 49.875 49.8333 49.7917 49.75 49.7083 49.6667 49.625 ... - Data variables: - ppt (T, Y, X) float64 ... - tdmean (T, Y, X) float64 ... - tmax (T, Y, X) float64 ... - tmin (T, Y, X) float64 ... - Attributes: - Conventions: IRIDL - expires: 1375315200 +This also works with ``open_mfdataset``, allowing you to pass a list of paths or +a URL to be interpreted as a glob string. -.. TODO: update this example to show off decode_cf? +For older versions, and for writing, you must explicitly set up a ``MutableMapping`` +instance and pass this, as follows: -.. note:: +.. code:: python - Like many real-world datasets, this dataset does not entirely follow - `CF conventions`_. Unexpected formats will usually cause xarray's automatic - decoding to fail. The way to work around this is to either set - ``decode_cf=False`` in ``open_dataset`` to turn off all use of CF - conventions, or by only disabling the troublesome parser. - In this case, we set ``decode_times=False`` because the time axis here - provides the calendar attribute in a format that xarray does not expect - (the integer ``360`` instead of a string like ``'360_day'``). + import gcsfs -We can select and slice this data any number of times, and nothing is loaded -over the network until we look at particular values: + fs = gcsfs.GCSFileSystem(project="", token=None) + gcsmap = gcsfs.mapping.GCSMap("", gcs=fs, check=True, create=False) + # write to the bucket + ds.to_zarr(store=gcsmap) + # read it back + ds_gcs = xr.open_zarr(gcsmap) -.. ipython:: - :verbatim: +(or use the utility function ``fsspec.get_mapper()``). - In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] +.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ +.. _Zarr: https://zarr.readthedocs.io/ +.. _Amazon S3: https://aws.amazon.com/s3/ +.. _Google Cloud Storage: https://cloud.google.com/storage/ +.. _gcsfs: https://github.com/fsspec/gcsfs - In [5]: tmax - Out[5]: - - [48541500 values with dtype=float64] - Coordinates: - * Y (Y) float32 49.9167 49.7917 49.6667 49.5417 49.4167 49.2917 ... - * X (X) float32 -125.0 -124.875 -124.75 -124.625 -124.5 -124.375 ... - * T (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 ... - Attributes: - pointwidth: 120 - standard_name: air_temperature - units: Celsius_scale - expires: 1443657600 +Zarr Compressors and Filters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # the data is downloaded automatically when we make the plot - In [6]: tmax[0].plot() +There are many different options for compression and filtering possible with +zarr. These are described in the +`zarr documentation `_. +These options can be passed to the ``to_zarr`` method as variable encoding. +For example: -.. image:: ../_static/opendap-prism-tmax.png +.. ipython:: python + :suppress: -Some servers require authentication before we can access the data. For this -purpose we can explicitly create a :py:class:`backends.PydapDataStore` -and pass in a `Requests`__ session object. For example for -HTTP Basic authentication:: + ! rm -rf foo.zarr - import xarray as xr - import requests +.. ipython:: python - session = requests.Session() - session.auth = ('username', 'password') + import zarr - store = xr.backends.PydapDataStore.open('http://example.com/data', - session=session) - ds = xr.open_dataset(store) + compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) + ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) -`Pydap's cas module`__ has functions that generate custom sessions for -servers that use CAS single sign-on. For example, to connect to servers -that require NASA's URS authentication:: +.. note:: - import xarray as xr - from pydata.cas.urs import setup_session + Not all native zarr compression and filtering options have been tested with + xarray. - ds_url = 'https://gpm1.gesdisc.eosdis.nasa.gov/opendap/hyrax/example.nc' +.. _io.zarr.consolidated_metadata: - session = setup_session('username', 'password', check_url=ds_url) - store = xr.backends.PydapDataStore.open(ds_url, session=session) +Consolidated Metadata +~~~~~~~~~~~~~~~~~~~~~ - ds = xr.open_dataset(store) +Xarray needs to read all of the zarr metadata when it opens a dataset. +In some storage mediums, such as with cloud object storage (e.g. amazon S3), +this can introduce significant overhead, because two separate HTTP calls to the +object store must be made for each variable in the dataset. +As of xarray version 0.18, xarray by default uses a feature called +*consolidated metadata*, storing all metadata for the entire dataset with a +single key (by default called ``.zmetadata``). This typically drastically speeds +up opening the store. (For more information on this feature, consult the +`zarr docs `_.) -__ https://docs.python-requests.org -__ https://www.pydap.org/en/latest/client.html#authentication +By default, xarray writes consolidated metadata and attempts to read stores +with consolidated metadata, falling back to use non-consolidated metadata for +reads. Because this fall-back option is so much slower, xarray issues a +``RuntimeWarning`` with guidance when reading with consolidated metadata fails: -.. _io.pickle: + Failed to open Zarr store with consolidated metadata, falling back to try + reading non-consolidated metadata. This is typically much slower for + opening a dataset. To silence this warning, consider: -Pickle ------- + 1. Consolidating metadata in this existing store with + :py:func:`zarr.consolidate_metadata`. + 2. Explicitly setting ``consolidated=False``, to avoid trying to read + consolidate metadata. + 3. Explicitly setting ``consolidated=True``, to raise an error in this case + instead of falling back to try reading non-consolidated metadata. -The simplest way to serialize an xarray object is to use Python's built-in pickle -module: +.. _io.zarr.appending: -.. ipython:: python +Appending to existing Zarr stores +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - import pickle +Xarray supports several ways of incrementally writing variables to a Zarr +store. These options are useful for scenarios when it is infeasible or +undesirable to write your entire dataset at once. - # use the highest protocol (-1) because it is way faster than the default - # text based pickle format - pkl = pickle.dumps(ds, protocol=-1) +.. tip:: - pickle.loads(pkl) + If you can load all of your data into a single ``Dataset`` using dask, a + single call to ``to_zarr()`` will write all of your data in parallel. -Pickling is important because it doesn't require any external libraries -and lets you use xarray objects with Python modules like -:py:mod:`multiprocessing` or :ref:`Dask `. However, pickling is -**not recommended for long-term storage**. +.. warning:: -Restoring a pickle requires that the internal structure of the types for the -pickled data remain unchanged. Because the internal design of xarray is still -being refined, we make no guarantees (at this point) that objects pickled with -this version of xarray will work in future versions. - -.. note:: + Alignment of coordinates is currently not checked when modifying an + existing Zarr store. It is up to the user to ensure that coordinates are + consistent. - When pickling an object opened from a NetCDF file, the pickle file will - contain a reference to the file on disk. If you want to store the actual - array values, load it into memory first with :py:meth:`Dataset.load` - or :py:meth:`Dataset.compute`. +To add or overwrite entire variables, simply call :py:meth:`~Dataset.to_zarr` +with ``mode='a'`` on a Dataset containing the new variables, passing in an +existing Zarr store or path to a Zarr store. -.. _dictionary io: +To resize and then append values along an existing dimension in a store, set +``append_dim``. This is a good option if data always arives in a particular +order, e.g., for time-stepping a simulation: -Dictionary ----------- +.. ipython:: python + :suppress: -We can convert a ``Dataset`` (or a ``DataArray``) to a dict using -:py:meth:`Dataset.to_dict`: + ! rm -rf path/to/directory.zarr .. ipython:: python - d = ds.to_dict() - d + ds1 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-01", periods=2), + }, + ) + ds1.to_zarr("path/to/directory.zarr") + ds2 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-03", periods=2), + }, + ) + ds2.to_zarr("path/to/directory.zarr", append_dim="t") -We can create a new xarray object from a dict using -:py:meth:`Dataset.from_dict`: +Finally, you can use ``region`` to write to limited regions of existing arrays +in an existing Zarr store. This is a good option for writing data in parallel +from independent processes. + +To scale this up to writing large datasets, the first step is creating an +initial Zarr store without writing all of its array data. This can be done by +first creating a ``Dataset`` with dummy values stored in :ref:`dask `, +and then calling ``to_zarr`` with ``compute=False`` to write only metadata +(including ``attrs``) to Zarr: .. ipython:: python + :suppress: - ds_dict = xr.Dataset.from_dict(d) - ds_dict + ! rm -rf path/to/directory.zarr -Dictionary support allows for flexible use of xarray objects. It doesn't -require external libraries and dicts can easily be pickled, or converted to -json, or geojson. All the values are converted to lists, so dicts might -be quite large. +.. ipython:: python -To export just the dataset schema without the data itself, use the -``data=False`` option: + import dask.array + + # The values of this dask array are entirely irrelevant; only the dtype, + # shape and chunks are used + dummies = dask.array.zeros(30, chunks=10) + ds = xr.Dataset({"foo": ("x", dummies)}) + path = "path/to/directory.zarr" + # Now we write the metadata without computing any array values + ds.to_zarr(path, compute=False) + +Now, a Zarr store with the correct variable shapes and attributes exists that +can be filled out by subsequent calls to ``to_zarr``. The ``region`` provides a +mapping from dimension names to Python ``slice`` objects indicating where the +data should be written (in index space, not coordinate space), e.g., .. ipython:: python - ds.to_dict(data=False) + # For convenience, we'll slice a single dataset, but in the real use-case + # we would create them separately possibly even from separate processes. + ds = xr.Dataset({"foo": ("x", np.arange(30))}) + ds.isel(x=slice(0, 10)).to_zarr(path, region={"x": slice(0, 10)}) + ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": slice(10, 20)}) + ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)}) -This can be useful for generating indices of dataset contents to expose to -search indices or other automated data discovery tools. +Concurrent writes with ``region`` are safe as long as they modify distinct +chunks in the underlying Zarr arrays (or use an appropriate ``lock``). -.. ipython:: python - :suppress: +As a safety check to make it harder to inadvertently override existing values, +if you set ``region`` then *all* variables included in a Dataset must have +dimensions included in ``region``. Other variables (typically coordinates) +need to be explicitly dropped and/or written in a separate calls to ``to_zarr`` +with ``mode='a'``. - import os +.. _io.iris: - os.remove("saved_on_disk.nc") +Iris +---- -.. _io.rasterio: +The Iris_ tool allows easy reading of common meteorological and climate model formats +(including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very +similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is +installed, xarray can convert a ``DataArray`` into a ``Cube`` using +:py:meth:`DataArray.to_iris`: -Rasterio --------- +.. ipython:: python -GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if -rasterio is installed. Here is an example of how to use -:py:func:`open_rasterio` to read one of rasterio's `test files`_: + da = xr.DataArray( + np.random.rand(4, 5), + dims=["x", "y"], + coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), + ) -.. deprecated:: 0.20.0 + cube = da.to_iris() + cube - Deprecated in favor of rioxarray. - For information about transitioning, see: - https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html +Conversely, we can create a new ``DataArray`` object from a ``Cube`` using +:py:meth:`DataArray.from_iris`: -.. ipython:: - :verbatim: +.. ipython:: python - In [7]: rio = xr.open_rasterio("RGB.byte.tif") + da_cube = xr.DataArray.from_iris(cube) + da_cube - In [8]: rio - Out[8]: - - [1703814 values with dtype=uint8] - Coordinates: - * band (band) int64 1 2 3 - * y (y) float64 2.827e+06 2.826e+06 2.826e+06 2.826e+06 2.826e+06 ... - * x (x) float64 1.021e+05 1.024e+05 1.027e+05 1.03e+05 1.033e+05 ... - Attributes: - res: (300.0379266750948, 300.041782729805) - transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805, 28... - is_tiled: 0 - crs: +init=epsg:32618 +.. _Iris: https://scitools.org.uk/iris -The ``x`` and ``y`` coordinates are generated out of the file's metadata -(``bounds``, ``width``, ``height``), and they can be understood as cartesian -coordinates defined in the file's projection provided by the ``crs`` attribute. -``crs`` is a PROJ4 string which can be parsed by e.g. `pyproj`_ or rasterio. -See :ref:`/examples/visualization_gallery.ipynb#Parsing-rasterio-geocoordinates` -for an example of how to convert these to longitudes and latitudes. +OPeNDAP +------- -Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other -GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF. -`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping. +Xarray includes support for `OPeNDAP`__ (via the netCDF4 library or Pydap), which +lets us access large datasets over HTTP. -.. ipython:: - :verbatim: +__ https://www.opendap.org/ - In [1]: import rioxarray +For example, we can open a connection to GBs of weather data produced by the +`PRISM`__ project, and hosted by `IRI`__ at Columbia: - In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") +__ https://www.prism.oregonstate.edu/ +__ https://iri.columbia.edu/ - In [3]: rds - Out[3]: - - [1703814 values with dtype=uint8] - Coordinates: - * band (band) int64 1 2 3 - * y (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06 - * x (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05 - spatial_ref int64 0 - Attributes: - STATISTICS_MAXIMUM: 255 - STATISTICS_MEAN: 29.947726688477 - STATISTICS_MINIMUM: 0 - STATISTICS_STDDEV: 52.340921626611 - transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827... - _FillValue: 0.0 - scale_factor: 1.0 - add_offset: 0.0 - grid_mapping: spatial_ref +.. ipython source code for this section + we don't use this to avoid hitting the DAP server on every doc build. - In [4]: rds.rio.crs - Out[4]: CRS.from_epsg(32618) + remote_data = xr.open_dataset( + 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', + decode_times=False) + tmax = remote_data.tmax[:500, ::3, ::3] + tmax - In [5]: rds4326 = rds.rio.reproject("epsg:4326") + @savefig opendap-prism-tmax.png + tmax[0].plot() - In [6]: rds4326.rio.crs - Out[6]: CRS.from_epsg(4326) +.. ipython:: + :verbatim: - In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") + In [3]: remote_data = xr.open_dataset( + ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + ...: decode_times=False, + ...: ) + In [4]: remote_data + Out[4]: + + Dimensions: (T: 1422, X: 1405, Y: 621) + Coordinates: + * X (X) float32 -125.0 -124.958 -124.917 -124.875 -124.833 -124.792 -124.75 ... + * T (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 -772.5 -771.5 ... + * Y (Y) float32 49.9167 49.875 49.8333 49.7917 49.75 49.7083 49.6667 49.625 ... + Data variables: + ppt (T, Y, X) float64 ... + tdmean (T, Y, X) float64 ... + tmax (T, Y, X) float64 ... + tmin (T, Y, X) float64 ... + Attributes: + Conventions: IRIDL + expires: 1375315200 -.. _rasterio: https://rasterio.readthedocs.io/en/latest/ -.. _rioxarray: https://corteva.github.io/rioxarray/stable/ -.. _test files: https://github.com/rasterio/rasterio/blob/master/tests/data/RGB.byte.tif -.. _pyproj: https://github.com/pyproj4/pyproj +.. TODO: update this example to show off decode_cf? -.. _io.zarr: +.. note:: -Zarr ----- + Like many real-world datasets, this dataset does not entirely follow + `CF conventions`_. Unexpected formats will usually cause xarray's automatic + decoding to fail. The way to work around this is to either set + ``decode_cf=False`` in ``open_dataset`` to turn off all use of CF + conventions, or by only disabling the troublesome parser. + In this case, we set ``decode_times=False`` because the time axis here + provides the calendar attribute in a format that xarray does not expect + (the integer ``360`` instead of a string like ``'360_day'``). -`Zarr`_ is a Python package that provides an implementation of chunked, compressed, -N-dimensional arrays. -Zarr has the ability to store arrays in a range of ways, including in memory, -in files, and in cloud-based object storage such as `Amazon S3`_ and -`Google Cloud Storage`_. -Xarray's Zarr backend allows xarray to leverage these capabilities, including -the ability to store and analyze datasets far too large fit onto disk -(particularly :ref:`in combination with dask `). +We can select and slice this data any number of times, and nothing is loaded +over the network until we look at particular values: -Xarray can't open just any zarr dataset, because xarray requires special -metadata (attributes) describing the dataset dimensions and coordinates. -At this time, xarray can only open zarr datasets that have been written by -xarray. For implementation details, see :ref:`zarr_encoding`. +.. ipython:: + :verbatim: -To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method. + In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] -To write to a local directory, we pass a path to a directory: + In [5]: tmax + Out[5]: + + [48541500 values with dtype=float64] + Coordinates: + * Y (Y) float32 49.9167 49.7917 49.6667 49.5417 49.4167 49.2917 ... + * X (X) float32 -125.0 -124.875 -124.75 -124.625 -124.5 -124.375 ... + * T (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 ... + Attributes: + pointwidth: 120 + standard_name: air_temperature + units: Celsius_scale + expires: 1443657600 -.. ipython:: python - :suppress: + # the data is downloaded automatically when we make the plot + In [6]: tmax[0].plot() - ! rm -rf path/to/directory.zarr +.. image:: ../_static/opendap-prism-tmax.png -.. ipython:: python +Some servers require authentication before we can access the data. For this +purpose we can explicitly create a :py:class:`backends.PydapDataStore` +and pass in a `Requests`__ session object. For example for +HTTP Basic authentication:: - ds = xr.Dataset( - {"foo": (("x", "y"), np.random.rand(4, 5))}, - coords={ - "x": [10, 20, 30, 40], - "y": pd.date_range("2000-01-01", periods=5), - "z": ("x", list("abcd")), - }, - ) - ds.to_zarr("path/to/directory.zarr") + import xarray as xr + import requests -(The suffix ``.zarr`` is optional--just a reminder that a zarr store lives -there.) If the directory does not exist, it will be created. If a zarr -store is already present at that path, an error will be raised, preventing it -from being overwritten. To override this behavior and overwrite an existing -store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`. + session = requests.Session() + session.auth = ('username', 'password') -To store variable length strings, convert them to object arrays first with -``dtype=object``. + store = xr.backends.PydapDataStore.open('http://example.com/data', + session=session) + ds = xr.open_dataset(store) -To read back a zarr dataset that has been created this way, we use the -:py:func:`open_zarr` method: +`Pydap's cas module`__ has functions that generate custom sessions for +servers that use CAS single sign-on. For example, to connect to servers +that require NASA's URS authentication:: -.. ipython:: python + import xarray as xr + from pydata.cas.urs import setup_session - ds_zarr = xr.open_zarr("path/to/directory.zarr") - ds_zarr + ds_url = 'https://gpm1.gesdisc.eosdis.nasa.gov/opendap/hyrax/example.nc' -Cloud Storage Buckets -~~~~~~~~~~~~~~~~~~~~~ + session = setup_session('username', 'password', check_url=ds_url) + store = xr.backends.PydapDataStore.open(ds_url, session=session) -It is possible to read and write xarray datasets directly from / to cloud -storage buckets using zarr. This example uses the `gcsfs`_ package to provide -an interface to `Google Cloud Storage`_. + ds = xr.open_dataset(store) -From v0.16.2: general `fsspec`_ URLs are parsed and the store set up for you -automatically when reading, such that you can open a dataset in a single -call. You should include any arguments to the storage backend as the -key ``storage_options``, part of ``backend_kwargs``. +__ https://docs.python-requests.org +__ https://www.pydap.org/en/latest/client.html#authentication -.. code:: python +.. _io.pickle: - ds_gcs = xr.open_dataset( - "gcs:///path.zarr", - backend_kwargs={ - "storage_options": {"project": "", "token": None} - }, - engine="zarr", - ) +Pickle +------ +The simplest way to serialize an xarray object is to use Python's built-in pickle +module: -This also works with ``open_mfdataset``, allowing you to pass a list of paths or -a URL to be interpreted as a glob string. +.. ipython:: python -For older versions, and for writing, you must explicitly set up a ``MutableMapping`` -instance and pass this, as follows: + import pickle -.. code:: python + # use the highest protocol (-1) because it is way faster than the default + # text based pickle format + pkl = pickle.dumps(ds, protocol=-1) - import gcsfs + pickle.loads(pkl) - fs = gcsfs.GCSFileSystem(project="", token=None) - gcsmap = gcsfs.mapping.GCSMap("", gcs=fs, check=True, create=False) - # write to the bucket - ds.to_zarr(store=gcsmap) - # read it back - ds_gcs = xr.open_zarr(gcsmap) +Pickling is important because it doesn't require any external libraries +and lets you use xarray objects with Python modules like +:py:mod:`multiprocessing` or :ref:`Dask `. However, pickling is +**not recommended for long-term storage**. -(or use the utility function ``fsspec.get_mapper()``). +Restoring a pickle requires that the internal structure of the types for the +pickled data remain unchanged. Because the internal design of xarray is still +being refined, we make no guarantees (at this point) that objects pickled with +this version of xarray will work in future versions. -.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ -.. _Zarr: https://zarr.readthedocs.io/ -.. _Amazon S3: https://aws.amazon.com/s3/ -.. _Google Cloud Storage: https://cloud.google.com/storage/ -.. _gcsfs: https://github.com/fsspec/gcsfs +.. note:: -Zarr Compressors and Filters -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + When pickling an object opened from a NetCDF file, the pickle file will + contain a reference to the file on disk. If you want to store the actual + array values, load it into memory first with :py:meth:`Dataset.load` + or :py:meth:`Dataset.compute`. -There are many different options for compression and filtering possible with -zarr. These are described in the -`zarr documentation `_. -These options can be passed to the ``to_zarr`` method as variable encoding. -For example: +.. _dictionary io: -.. ipython:: python - :suppress: +Dictionary +---------- - ! rm -rf foo.zarr +We can convert a ``Dataset`` (or a ``DataArray``) to a dict using +:py:meth:`Dataset.to_dict`: .. ipython:: python - import zarr - - compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) - ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) + d = ds.to_dict() + d -.. note:: +We can create a new xarray object from a dict using +:py:meth:`Dataset.from_dict`: - Not all native zarr compression and filtering options have been tested with - xarray. +.. ipython:: python -.. _io.zarr.consolidated_metadata: + ds_dict = xr.Dataset.from_dict(d) + ds_dict -Consolidated Metadata -~~~~~~~~~~~~~~~~~~~~~ +Dictionary support allows for flexible use of xarray objects. It doesn't +require external libraries and dicts can easily be pickled, or converted to +json, or geojson. All the values are converted to lists, so dicts might +be quite large. -Xarray needs to read all of the zarr metadata when it opens a dataset. -In some storage mediums, such as with cloud object storage (e.g. amazon S3), -this can introduce significant overhead, because two separate HTTP calls to the -object store must be made for each variable in the dataset. -As of xarray version 0.18, xarray by default uses a feature called -*consolidated metadata*, storing all metadata for the entire dataset with a -single key (by default called ``.zmetadata``). This typically drastically speeds -up opening the store. (For more information on this feature, consult the -`zarr docs `_.) +To export just the dataset schema without the data itself, use the +``data=False`` option: -By default, xarray writes consolidated metadata and attempts to read stores -with consolidated metadata, falling back to use non-consolidated metadata for -reads. Because this fall-back option is so much slower, xarray issues a -``RuntimeWarning`` with guidance when reading with consolidated metadata fails: +.. ipython:: python - Failed to open Zarr store with consolidated metadata, falling back to try - reading non-consolidated metadata. This is typically much slower for - opening a dataset. To silence this warning, consider: + ds.to_dict(data=False) - 1. Consolidating metadata in this existing store with - :py:func:`zarr.consolidate_metadata`. - 2. Explicitly setting ``consolidated=False``, to avoid trying to read - consolidate metadata. - 3. Explicitly setting ``consolidated=True``, to raise an error in this case - instead of falling back to try reading non-consolidated metadata. +This can be useful for generating indices of dataset contents to expose to +search indices or other automated data discovery tools. -.. _io.zarr.appending: +.. ipython:: python + :suppress: -Appending to existing Zarr stores -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + import os -Xarray supports several ways of incrementally writing variables to a Zarr -store. These options are useful for scenarios when it is infeasible or -undesirable to write your entire dataset at once. + os.remove("saved_on_disk.nc") -.. tip:: +.. _io.rasterio: - If you can load all of your data into a single ``Dataset`` using dask, a - single call to ``to_zarr()`` will write all of your data in parallel. +Rasterio +-------- -.. warning:: +GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if +rasterio is installed. Here is an example of how to use +:py:func:`open_rasterio` to read one of rasterio's `test files`_: - Alignment of coordinates is currently not checked when modifying an - existing Zarr store. It is up to the user to ensure that coordinates are - consistent. +.. deprecated:: 0.20.0 -To add or overwrite entire variables, simply call :py:meth:`~Dataset.to_zarr` -with ``mode='a'`` on a Dataset containing the new variables, passing in an -existing Zarr store or path to a Zarr store. + Deprecated in favor of rioxarray. + For information about transitioning, see: + https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html -To resize and then append values along an existing dimension in a store, set -``append_dim``. This is a good option if data always arives in a particular -order, e.g., for time-stepping a simulation: +.. ipython:: + :verbatim: -.. ipython:: python - :suppress: + In [7]: rio = xr.open_rasterio("RGB.byte.tif") - ! rm -rf path/to/directory.zarr + In [8]: rio + Out[8]: + + [1703814 values with dtype=uint8] + Coordinates: + * band (band) int64 1 2 3 + * y (y) float64 2.827e+06 2.826e+06 2.826e+06 2.826e+06 2.826e+06 ... + * x (x) float64 1.021e+05 1.024e+05 1.027e+05 1.03e+05 1.033e+05 ... + Attributes: + res: (300.0379266750948, 300.041782729805) + transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805, 28... + is_tiled: 0 + crs: +init=epsg:32618 -.. ipython:: python - ds1 = xr.Dataset( - {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, - coords={ - "x": [10, 20, 30, 40], - "y": [1, 2, 3, 4, 5], - "t": pd.date_range("2001-01-01", periods=2), - }, - ) - ds1.to_zarr("path/to/directory.zarr") - ds2 = xr.Dataset( - {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, - coords={ - "x": [10, 20, 30, 40], - "y": [1, 2, 3, 4, 5], - "t": pd.date_range("2001-01-03", periods=2), - }, - ) - ds2.to_zarr("path/to/directory.zarr", append_dim="t") +The ``x`` and ``y`` coordinates are generated out of the file's metadata +(``bounds``, ``width``, ``height``), and they can be understood as cartesian +coordinates defined in the file's projection provided by the ``crs`` attribute. +``crs`` is a PROJ4 string which can be parsed by e.g. `pyproj`_ or rasterio. +See :ref:`/examples/visualization_gallery.ipynb#Parsing-rasterio-geocoordinates` +for an example of how to convert these to longitudes and latitudes. -Finally, you can use ``region`` to write to limited regions of existing arrays -in an existing Zarr store. This is a good option for writing data in parallel -from independent processes. -To scale this up to writing large datasets, the first step is creating an -initial Zarr store without writing all of its array data. This can be done by -first creating a ``Dataset`` with dummy values stored in :ref:`dask `, -and then calling ``to_zarr`` with ``compute=False`` to write only metadata -(including ``attrs``) to Zarr: +Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other +GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF. +`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping. -.. ipython:: python - :suppress: +.. ipython:: + :verbatim: - ! rm -rf path/to/directory.zarr + In [1]: import rioxarray -.. ipython:: python + In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") - import dask.array + In [3]: rds + Out[3]: + + [1703814 values with dtype=uint8] + Coordinates: + * band (band) int64 1 2 3 + * y (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06 + * x (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05 + spatial_ref int64 0 + Attributes: + STATISTICS_MAXIMUM: 255 + STATISTICS_MEAN: 29.947726688477 + STATISTICS_MINIMUM: 0 + STATISTICS_STDDEV: 52.340921626611 + transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827... + _FillValue: 0.0 + scale_factor: 1.0 + add_offset: 0.0 + grid_mapping: spatial_ref - # The values of this dask array are entirely irrelevant; only the dtype, - # shape and chunks are used - dummies = dask.array.zeros(30, chunks=10) - ds = xr.Dataset({"foo": ("x", dummies)}) - path = "path/to/directory.zarr" - # Now we write the metadata without computing any array values - ds.to_zarr(path, compute=False) + In [4]: rds.rio.crs + Out[4]: CRS.from_epsg(32618) -Now, a Zarr store with the correct variable shapes and attributes exists that -can be filled out by subsequent calls to ``to_zarr``. The ``region`` provides a -mapping from dimension names to Python ``slice`` objects indicating where the -data should be written (in index space, not coordinate space), e.g., + In [5]: rds4326 = rds.rio.reproject("epsg:4326") -.. ipython:: python + In [6]: rds4326.rio.crs + Out[6]: CRS.from_epsg(4326) - # For convenience, we'll slice a single dataset, but in the real use-case - # we would create them separately possibly even from separate processes. - ds = xr.Dataset({"foo": ("x", np.arange(30))}) - ds.isel(x=slice(0, 10)).to_zarr(path, region={"x": slice(0, 10)}) - ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": slice(10, 20)}) - ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)}) + In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") -Concurrent writes with ``region`` are safe as long as they modify distinct -chunks in the underlying Zarr arrays (or use an appropriate ``lock``). -As a safety check to make it harder to inadvertently override existing values, -if you set ``region`` then *all* variables included in a Dataset must have -dimensions included in ``region``. Other variables (typically coordinates) -need to be explicitly dropped and/or written in a separate calls to ``to_zarr`` -with ``mode='a'``. +.. _rasterio: https://rasterio.readthedocs.io/en/latest/ +.. _rioxarray: https://corteva.github.io/rioxarray/stable/ +.. _test files: https://github.com/rasterio/rasterio/blob/master/tests/data/RGB.byte.tif +.. _pyproj: https://github.com/pyproj4/pyproj .. _io.cfgrib: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 37abf931eb4..aa48bd619e8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -5133,7 +5133,7 @@ Enhancements .. ipython:: python ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) - ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds["distance"] = np.sqrt(ds.x**2 + ds.y**2) @savefig where_example.png width=4in height=4in ds.distance.where(ds.distance < 100).plot() @@ -5341,7 +5341,7 @@ Enhancements .. ipython:: python ds = xray.Dataset({"y": ("x", [1, 2, 3])}) - ds.assign(z=lambda ds: ds.y ** 2) + ds.assign(z=lambda ds: ds.y**2) ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 88eefbdc441..c11bd1a78a4 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -944,7 +944,7 @@ def apply_ufunc( Calculate the vector magnitude of two arguments: >>> def magnitude(a, b): - ... func = lambda x, y: np.sqrt(x ** 2 + y ** 2) + ... func = lambda x, y: np.sqrt(x**2 + y**2) ... return xr.apply_ufunc(func, a, b) ... From b86a7c1a5614e7333267cdaab2e9f05e42e2cc0f Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 22 Feb 2022 19:50:04 -0800 Subject: [PATCH 50/68] Align language def in bugreport.yml with schema (#6290) * Align language def in bugreport.yml with schema Not sure if this matters at all, but VSCode's linter was complaining * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/bugreport.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bugreport.yml b/.github/ISSUE_TEMPLATE/bugreport.yml index bb1febae2ff..043584f3ea6 100644 --- a/.github/ISSUE_TEMPLATE/bugreport.yml +++ b/.github/ISSUE_TEMPLATE/bugreport.yml @@ -33,14 +33,14 @@ body: Bug reports that follow these guidelines are easier to diagnose, and so are often handled much more quickly. This will be automatically formatted into code, so no need for markdown backticks. - render: python + render: Python - type: textarea id: log-output attributes: label: Relevant log output description: Please copy and paste any relevant output. This will be automatically formatted into code, so no need for markdown backticks. - render: python + render: Python - type: textarea id: extra From b760807646cce493cf8f4f619fcda9a14e84670f Mon Sep 17 00:00:00 2001 From: Lukas Pilz Date: Wed, 23 Feb 2022 10:51:26 +0100 Subject: [PATCH 51/68] Amended docstring to reflect the actual behaviour of Dataset.map (#6232) * Amended docstring to reflect the actual behaviour of Dataset.map * Update --- xarray/core/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index af59f5cd2f1..fb30cf22e04 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5116,9 +5116,9 @@ def map( to transform each DataArray `x` in this dataset into another DataArray. keep_attrs : bool, optional - If True, the dataset's attributes (`attrs`) will be copied from - the original object to the new one. If False, the new object will - be returned without attributes. + If True, both the dataset's and variables' attributes (`attrs`) will be + copied from the original objects to the new ones. If False, the new dataset + and variables will be returned without copying the attributes. args : tuple, optional Positional arguments passed on to `func`. **kwargs : Any From 964bee8ba4f84794be41e279bdce62931b669269 Mon Sep 17 00:00:00 2001 From: Romain Caneill Date: Wed, 23 Feb 2022 18:54:28 +0100 Subject: [PATCH 52/68] Adding the new wrapper gsw-xarray (#6294) --- doc/ecosystem.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst index 469f83d37c1..2b49b1529e1 100644 --- a/doc/ecosystem.rst +++ b/doc/ecosystem.rst @@ -17,6 +17,7 @@ Geosciences - `climpred `_: Analysis of ensemble forecast models for climate prediction. - `geocube `_: Tool to convert geopandas vector data into rasterized xarray data. - `GeoWombat `_: Utilities for analysis of remotely sensed and gridded raster data at scale (easily tame Landsat, Sentinel, Quickbird, and PlanetScope). +- `gsw-xarray `_: a wrapper around `gsw `_ that adds CF compliant attributes when possible, units, name. - `infinite-diff `_: xarray-based finite-differencing, focused on gridded climate/meteorology data - `marc_analysis `_: Analysis package for CESM/MARC experiments and output. - `MetPy `_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data. From de965f342e1c9c5de92ab135fbc4062e21e72453 Mon Sep 17 00:00:00 2001 From: Lukas Pilz Date: Wed, 23 Feb 2022 18:54:46 +0100 Subject: [PATCH 53/68] Amended docs on how to add a new backend (#6292) Co-authored-by: Lukas Pilz --- doc/internals/how-to-add-new-backend.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index ceb59c8a3bd..4e4ae4e2e14 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -273,7 +273,7 @@ If you are using `Poetry `_ for your build system, y .. code-block:: toml - [tool.poetry.plugins."xarray_backends"] + [tool.poetry.plugins."xarray.backends"] "my_engine" = "my_package.my_module:MyBackendEntryClass" See https://python-poetry.org/docs/pyproject/#plugins for more information on Poetry plugins. From 17acbb027326ac5f2379fc6cabf425459759e0ca Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Fri, 25 Feb 2022 16:08:30 -0500 Subject: [PATCH 54/68] Drop duplicates over multiple dims, and add Dataset.drop_duplicates (#6307) * tests for da.drop_duplicates over multiple dims * pass tests * test for Dataset.drop_duplicates * piped both paths through dataset.drop_duplicates * added dataset.drop_duplicates to API docs * whats-new entry * correct small bug when raising error --- doc/api.rst | 1 + doc/whats-new.rst | 4 ++- xarray/core/dataarray.py | 17 ++++++---- xarray/core/dataset.py | 39 ++++++++++++++++++++++ xarray/tests/test_dataarray.py | 59 ++++++++++++++++++++++++---------- xarray/tests/test_dataset.py | 31 ++++++++++++++++++ 6 files changed, 126 insertions(+), 25 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index b552bc6b4d2..d2c222da4db 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -106,6 +106,7 @@ Dataset contents Dataset.swap_dims Dataset.expand_dims Dataset.drop_vars + Dataset.drop_duplicates Dataset.drop_dims Dataset.set_coords Dataset.reset_coords diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aa48bd619e8..24a8042ee66 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,7 +31,9 @@ New Features - Enbable to provide more keyword arguments to `pydap` backend when reading OpenDAP datasets (:issue:`6274`). By `Jonas Gliß `. - +- Allow :py:meth:`DataArray.drop_duplicates` to drop duplicates along multiple dimensions at once, + and add :py:meth:`Dataset.drop_duplicates`. (:pull:`6307`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 20e829d293e..b3c45d65818 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4659,14 +4659,15 @@ def curvefit( def drop_duplicates( self, - dim: Hashable, - keep: (str | bool) = "first", + dim: Hashable | Iterable[Hashable] | ..., + keep: Literal["first", "last"] | Literal[False] = "first", ): """Returns a new DataArray with duplicate dimension values removed. Parameters ---------- - dim : dimension label, optional + dim : dimension label or labels + Pass `...` to drop duplicates along all dimensions. keep : {"first", "last", False}, default: "first" Determines which duplicates (if any) to keep. - ``"first"`` : Drop duplicates except for the first occurrence. @@ -4676,11 +4677,13 @@ def drop_duplicates( Returns ------- DataArray + + See Also + -------- + Dataset.drop_duplicates """ - if dim not in self.dims: - raise ValueError(f"'{dim}' not found in dimensions") - indexes = {dim: ~self.get_index(dim).duplicated(keep=keep)} - return self.isel(indexes) + deduplicated = self._to_temp_dataset().drop_duplicates(dim, keep=keep) + return self._from_temp_dataset(deduplicated) def convert_calendar( self, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fb30cf22e04..be9df9d2e2d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7770,6 +7770,45 @@ def _wrapper(Y, *coords_, **kwargs): return result + def drop_duplicates( + self, + dim: Hashable | Iterable[Hashable] | ..., + keep: Literal["first", "last"] | Literal[False] = "first", + ): + """Returns a new Dataset with duplicate dimension values removed. + + Parameters + ---------- + dim : dimension label or labels + Pass `...` to drop duplicates along all dimensions. + keep : {"first", "last", False}, default: "first" + Determines which duplicates (if any) to keep. + - ``"first"`` : Drop duplicates except for the first occurrence. + - ``"last"`` : Drop duplicates except for the last occurrence. + - False : Drop all duplicates. + + Returns + ------- + Dataset + + See Also + -------- + DataArray.drop_duplicates + """ + if isinstance(dim, str): + dims = (dim,) + elif dim is ...: + dims = self.dims + else: + dims = dim + + missing_dims = set(dims) - set(self.dims) + if missing_dims: + raise ValueError(f"'{missing_dims}' not found in dimensions") + + indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims} + return self.isel(indexes) + def convert_calendar( self, calendar: str, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8d73f9ec7ee..fc82c03c5d9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6618,25 +6618,50 @@ def test_clip(da): result = da.clip(min=da.mean("x"), max=da.mean("a").isel(x=[0, 1])) -@pytest.mark.parametrize("keep", ["first", "last", False]) -def test_drop_duplicates(keep): - ds = xr.DataArray( - [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test" - ) +class TestDropDuplicates: + @pytest.mark.parametrize("keep", ["first", "last", False]) + def test_drop_duplicates_1d(self, keep): + da = xr.DataArray( + [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test" + ) - if keep == "first": - data = [0, 6, 7] - time = [0, 1, 2] - elif keep == "last": - data = [5, 6, 7] - time = [0, 1, 2] - else: - data = [6, 7] - time = [1, 2] + if keep == "first": + data = [0, 6, 7] + time = [0, 1, 2] + elif keep == "last": + data = [5, 6, 7] + time = [0, 1, 2] + else: + data = [6, 7] + time = [1, 2] + + expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test") + result = da.drop_duplicates("time", keep=keep) + assert_equal(expected, result) + + with pytest.raises(ValueError, match="['space'] not found"): + da.drop_duplicates("space", keep=keep) + + def test_drop_duplicates_2d(self): + da = xr.DataArray( + [[0, 5, 6, 7], [2, 1, 3, 4]], + dims=["space", "time"], + coords={"space": [10, 10], "time": [0, 0, 1, 2]}, + name="test", + ) + + expected = xr.DataArray( + [[0, 6, 7]], + dims=["space", "time"], + coords={"time": ("time", [0, 1, 2]), "space": ("space", [10])}, + name="test", + ) + + result = da.drop_duplicates(["time", "space"], keep="first") + assert_equal(expected, result) - expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test") - result = ds.drop_duplicates("time", keep=keep) - assert_equal(expected, result) + result = da.drop_duplicates(..., keep="first") + assert_equal(expected, result) class TestNumpyCoercion: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c4fa847e664..7ff75fb791b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6546,6 +6546,37 @@ def test_clip(ds): assert result.dims == ds.dims +class TestDropDuplicates: + @pytest.mark.parametrize("keep", ["first", "last", False]) + def test_drop_duplicates_1d(self, keep): + ds = xr.Dataset( + {"a": ("time", [0, 5, 6, 7]), "b": ("time", [9, 3, 8, 2])}, + coords={"time": [0, 0, 1, 2]}, + ) + + if keep == "first": + a = [0, 6, 7] + b = [9, 8, 2] + time = [0, 1, 2] + elif keep == "last": + a = [5, 6, 7] + b = [3, 8, 2] + time = [0, 1, 2] + else: + a = [6, 7] + b = [8, 2] + time = [1, 2] + + expected = xr.Dataset( + {"a": ("time", a), "b": ("time", b)}, coords={"time": time} + ) + result = ds.drop_duplicates("time", keep=keep) + assert_equal(expected, result) + + with pytest.raises(ValueError, match="['space'] not found"): + ds.drop_duplicates("space", keep=keep) + + class TestNumpyCoercion: def test_from_numpy(self): ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])}) From 4292bdebd7c9c461b0814605509e90453fe47754 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 28 Feb 2022 10:11:01 +0100 Subject: [PATCH 55/68] from_dict: doctest (#6302) --- xarray/core/dataarray.py | 47 ++++++++++++++++------------ xarray/core/dataset.py | 67 +++++++++++++++++++++++++--------------- 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b3c45d65818..3d720f7fc8b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2873,25 +2873,7 @@ def to_dict(self, data: bool = True) -> dict: @classmethod def from_dict(cls, d: dict) -> DataArray: - """ - Convert a dictionary into an xarray.DataArray - - Input dict can take several forms: - - .. code:: python - - d = {"dims": "t", "data": x} - - d = { - "coords": {"t": {"dims": "t", "data": t, "attrs": {"units": "s"}}}, - "attrs": {"title": "air temperature"}, - "dims": "t", - "data": x, - "name": "a", - } - - where "t" is the name of the dimension, "a" is the name of the array, - and x and t are lists, numpy.arrays, or pandas objects. + """Convert a dictionary into an xarray.DataArray Parameters ---------- @@ -2906,6 +2888,33 @@ def from_dict(cls, d: dict) -> DataArray: -------- DataArray.to_dict Dataset.from_dict + + Examples + -------- + >>> d = {"dims": "t", "data": [1, 2, 3]} + >>> da = xr.DataArray.from_dict(d) + >>> da + + array([1, 2, 3]) + Dimensions without coordinates: t + + >>> d = { + ... "coords": { + ... "t": {"dims": "t", "data": [0, 1, 2], "attrs": {"units": "s"}} + ... }, + ... "attrs": {"title": "air temperature"}, + ... "dims": "t", + ... "data": [10, 20, 30], + ... "name": "a", + ... } + >>> da = xr.DataArray.from_dict(d) + >>> da + + array([10, 20, 30]) + Coordinates: + * t (t) int64 0 1 2 + Attributes: + title: air temperature """ coords = None if "coords" in d: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index be9df9d2e2d..90684c4db87 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5641,31 +5641,7 @@ def to_dict(self, data=True): @classmethod def from_dict(cls, d): - """ - Convert a dictionary into an xarray.Dataset. - - Input dict can take several forms: - - .. code:: python - - d = { - "t": {"dims": ("t"), "data": t}, - "a": {"dims": ("t"), "data": x}, - "b": {"dims": ("t"), "data": y}, - } - - d = { - "coords": {"t": {"dims": "t", "data": t, "attrs": {"units": "s"}}}, - "attrs": {"title": "air temperature"}, - "dims": "t", - "data_vars": { - "a": {"dims": "t", "data": x}, - "b": {"dims": "t", "data": y}, - }, - } - - where "t" is the name of the dimesion, "a" and "b" are names of data - variables and t, x, and y are lists, numpy.arrays or pandas objects. + """Convert a dictionary into an xarray.Dataset. Parameters ---------- @@ -5682,6 +5658,47 @@ def from_dict(cls, d): -------- Dataset.to_dict DataArray.from_dict + + Examples + -------- + >>> d = { + ... "t": {"dims": ("t"), "data": [0, 1, 2]}, + ... "a": {"dims": ("t"), "data": ["a", "b", "c"]}, + ... "b": {"dims": ("t"), "data": [10, 20, 30]}, + ... } + >>> ds = xr.Dataset.from_dict(d) + >>> ds + + Dimensions: (t: 3) + Coordinates: + * t (t) int64 0 1 2 + Data variables: + a (t) >> d = { + ... "coords": { + ... "t": {"dims": "t", "data": [0, 1, 2], "attrs": {"units": "s"}} + ... }, + ... "attrs": {"title": "air temperature"}, + ... "dims": "t", + ... "data_vars": { + ... "a": {"dims": "t", "data": [10, 20, 30]}, + ... "b": {"dims": "t", "data": ["a", "b", "c"]}, + ... }, + ... } + >>> ds = xr.Dataset.from_dict(d) + >>> ds + + Dimensions: (t: 3) + Coordinates: + * t (t) int64 0 1 2 + Data variables: + a (t) int64 10 20 30 + b (t) Date: Mon, 28 Feb 2022 04:53:21 -0500 Subject: [PATCH 56/68] On Windows, enable successful test of opening a dataset containing a cftime index (#6305) * Exit cluster context before deleting temporary directory Previously, on Windows, the scheduler in the outer context prevented deleting the temporary directory upon exiting the inner context of the latter. That caused the test to fail and the temporary directory and file to remain. * Use fixture instead of context manager for temporary directory * Edit whats-new entry --- doc/whats-new.rst | 2 +- xarray/tests/test_distributed.py | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 24a8042ee66..4729b74640f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -52,7 +52,7 @@ Bug fixes - Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling `_. -- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`). By `Martin Bergemann `_. +- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`). By `Martin Bergemann `_ and `Stan West `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index b97032014c4..773733b7b89 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -1,8 +1,6 @@ """ isort:skip_file """ -import os import pickle import numpy as np -import tempfile import pytest from packaging.version import Version @@ -113,19 +111,18 @@ def test_dask_distributed_netcdf_roundtrip( @requires_cftime @requires_netCDF4 -def test_open_mfdataset_can_open_files_with_cftime_index(): +def test_open_mfdataset_can_open_files_with_cftime_index(tmp_path): T = xr.cftime_range("20010101", "20010501", calendar="360_day") Lon = np.arange(100) data = np.random.random((T.size, Lon.size)) da = xr.DataArray(data, coords={"time": T, "Lon": Lon}, name="test") + file_path = tmp_path / "test.nc" + da.to_netcdf(file_path) with cluster() as (s, [a, b]): with Client(s["address"]): - with tempfile.TemporaryDirectory() as td: - data_file = os.path.join(td, "test.nc") - da.to_netcdf(data_file) - for parallel in (False, True): - with xr.open_mfdataset(data_file, parallel=parallel) as tf: - assert_identical(tf["test"], da) + for parallel in (False, True): + with xr.open_mfdataset(file_path, parallel=parallel) as tf: + assert_identical(tf["test"], da) @pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS) From 555a70e290d8a13deb178da64d1d994a10f6acaf Mon Sep 17 00:00:00 2001 From: Stijn Van Hoey Date: Tue, 1 Mar 2022 16:01:39 +0100 Subject: [PATCH 57/68] Fix class attributes versus init parameters (#6312) --- doc/internals/how-to-add-new-backend.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index 4e4ae4e2e14..940fa7a4d1e 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -338,8 +338,8 @@ This is an example ``BackendArray`` subclass implementation: # other backend specific keyword arguments ): self.shape = shape - self.dtype = lock - self.lock = dtype + self.dtype = dtype + self.lock = lock def __getitem__( self, key: xarray.core.indexing.ExplicitIndexer From 0f91f05e532f4424f3d6afd6e6d5bd5a02ceed55 Mon Sep 17 00:00:00 2001 From: Stan West <38358698+stanwest@users.noreply.github.com> Date: Tue, 1 Mar 2022 11:00:33 -0500 Subject: [PATCH 58/68] Enable running sphinx-build on Windows (#6237) --- .gitignore | 4 +-- doc/conf.py | 4 +-- doc/getting-started-guide/quick-overview.rst | 4 ++- doc/internals/zarr-encoding-spec.rst | 7 +++++ doc/user-guide/dask.rst | 26 +++++++-------- doc/user-guide/io.rst | 33 ++++++++++++-------- doc/user-guide/weather-climate.rst | 4 ++- doc/whats-new.rst | 3 ++ 8 files changed, 53 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 90f4a10ed5f..686c7efa701 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,9 @@ __pycache__ .hypothesis/ # temp files from docs build +doc/*.nc doc/auto_gallery -doc/example.nc +doc/rasm.zarr doc/savefig # C extensions @@ -72,4 +73,3 @@ xarray/tests/data/*.grib.*.idx Icon* .ipynb_checkpoints -doc/rasm.zarr diff --git a/doc/conf.py b/doc/conf.py index 5c4c0a52d43..8ce9efdce88 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -28,9 +28,9 @@ print("python exec:", sys.executable) print("sys.path:", sys.path) -if "conda" in sys.executable: +if "CONDA_DEFAULT_ENV" in os.environ or "conda" in sys.executable: print("conda environment:") - subprocess.run(["conda", "list"]) + subprocess.run([os.environ.get("CONDA_EXE", "conda"), "list"]) else: print("pip environment:") subprocess.run([sys.executable, "-m", "pip", "list"]) diff --git a/doc/getting-started-guide/quick-overview.rst b/doc/getting-started-guide/quick-overview.rst index cd4b66d2f6f..ee13fea8bf1 100644 --- a/doc/getting-started-guide/quick-overview.rst +++ b/doc/getting-started-guide/quick-overview.rst @@ -215,13 +215,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. ipython:: python ds.to_netcdf("example.nc") - xr.open_dataset("example.nc") + reopened = xr.open_dataset("example.nc") + reopened .. ipython:: python :suppress: import os + reopened.close() os.remove("example.nc") diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index f809ea337d5..7fb2383935f 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -63,3 +63,10 @@ re-open it directly with Zarr: print(os.listdir("rasm.zarr")) print(zgroup.tree()) dict(zgroup["Tair"].attrs) + +.. ipython:: python + :suppress: + + import shutil + + shutil.rmtree("rasm.zarr") diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index 4d8715d9c51..5110a970390 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -55,6 +55,8 @@ argument to :py:func:`~xarray.open_dataset` or using the .. ipython:: python :suppress: + import os + import numpy as np import pandas as pd import xarray as xr @@ -129,6 +131,11 @@ will return a ``dask.delayed`` object that can be computed later. with ProgressBar(): results = delayed_obj.compute() +.. ipython:: python + :suppress: + + os.remove("manipulated-example-data.nc") # Was not opened. + .. note:: When using Dask's distributed scheduler to write NETCDF4 files, @@ -147,13 +154,6 @@ A dataset can also be converted to a Dask DataFrame using :py:meth:`~xarray.Data Dask DataFrames do not support multi-indexes so the coordinate variables from the dataset are included as columns in the Dask DataFrame. -.. ipython:: python - :suppress: - - import os - - os.remove("example-data.nc") - os.remove("manipulated-example-data.nc") Using Dask with xarray ---------------------- @@ -210,7 +210,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method: .. ipython:: python - ds = ds.persist() + persisted = ds.persist() :py:meth:`~xarray.Dataset.persist` is particularly useful when using a distributed cluster because the data will be loaded into distributed memory @@ -232,11 +232,6 @@ chunk size depends both on your data and on the operations you want to perform. With xarray, both converting data to a Dask arrays and converting the chunk sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method: -.. ipython:: python - :suppress: - - ds = ds.chunk({"time": 10}) - .. ipython:: python rechunked = ds.chunk({"latitude": 100, "longitude": 100}) @@ -508,6 +503,11 @@ Notice that the 0-shaped sizes were not printed to screen. Since ``template`` ha expected = ds + 10 + 10 mapped.identical(expected) +.. ipython:: python + :suppress: + + ds.close() # Closes "example-data.nc". + os.remove("example-data.nc") .. tip:: diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 834e9ad2464..ddde0bf5888 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -11,6 +11,8 @@ format (recommended). .. ipython:: python :suppress: + import os + import numpy as np import pandas as pd import xarray as xr @@ -84,6 +86,13 @@ We can load netCDF files to create a new Dataset using ds_disk = xr.open_dataset("saved_on_disk.nc") ds_disk +.. ipython:: python + :suppress: + + # Close "saved_on_disk.nc", but retain the file until after closing or deleting other + # datasets that will refer to it. + ds_disk.close() + Similarly, a DataArray can be saved to disk using the :py:meth:`DataArray.to_netcdf` method, and loaded from disk using the :py:func:`open_dataarray` function. As netCDF files @@ -204,11 +213,6 @@ You can view this encoding information (among others) in the Note that all operations that manipulate variables other than indexing will remove encoding information. -.. ipython:: python - :suppress: - - ds_disk.close() - .. _combining multiple files: @@ -484,13 +488,13 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True) # Reading it back - xr.open_dataarray("complex.nc", engine="h5netcdf") + reopened = xr.open_dataarray("complex.nc", engine="h5netcdf") + reopened .. ipython:: python :suppress: - import os - + reopened.close() os.remove("complex.nc") .. warning:: @@ -989,16 +993,19 @@ To export just the dataset schema without the data itself, use the ds.to_dict(data=False) -This can be useful for generating indices of dataset contents to expose to -search indices or other automated data discovery tools. - .. ipython:: python :suppress: - import os - + # We're now done with the dataset named `ds`. Although the `with` statement closed + # the dataset, displaying the unpickled pickle of `ds` re-opened "saved_on_disk.nc". + # However, `ds` (rather than the unpickled dataset) refers to the open file. Delete + # `ds` to close the file. + del ds os.remove("saved_on_disk.nc") +This can be useful for generating indices of dataset contents to expose to +search indices or other automated data discovery tools. + .. _io.rasterio: Rasterio diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index d11c7c3a4f9..3c957978acf 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -218,13 +218,15 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python da.to_netcdf("example-no-leap.nc") - xr.open_dataset("example-no-leap.nc") + reopened = xr.open_dataset("example-no-leap.nc") + reopened .. ipython:: python :suppress: import os + reopened.close() os.remove("example-no-leap.nc") - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4729b74640f..ae51b9f6ce1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,9 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Delete files of datasets saved to disk while building the documentation and enable + building on Windows via `sphinx-build` (:pull:`6237`). + By `Stan West `_. Internal Changes From f9037c41e36254bfe7efa9feaedd3eae5512bd11 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 1 Mar 2022 09:57:43 -0700 Subject: [PATCH 59/68] Disable CI runs on forks (#6315) --- .github/workflows/ci-additional.yaml | 1 + .github/workflows/ci.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index b476c224df6..6bd9bcd9d6b 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -119,6 +119,7 @@ jobs: doctest: name: Doctests runs-on: "ubuntu-latest" + if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4f6cbbc3871..1e5db3a73ed 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -115,6 +115,7 @@ jobs: event_file: name: "Event File" runs-on: ubuntu-latest + if: github.repository == 'pydata/xarray' steps: - name: Upload uses: actions/upload-artifact@v2 From 2ab9f36641a8a0248df0497aadd59246586b65ea Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 2 Mar 2022 06:56:25 -0700 Subject: [PATCH 60/68] Add General issue template (#6314) * Add General issue template * Update description Co-authored-by: Mathias Hauser * Remove title Co-authored-by: Mathias Hauser Co-authored-by: Mathias Hauser --- .github/ISSUE_TEMPLATE/misc.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/misc.yml diff --git a/.github/ISSUE_TEMPLATE/misc.yml b/.github/ISSUE_TEMPLATE/misc.yml new file mode 100644 index 00000000000..94dd2d86567 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/misc.yml @@ -0,0 +1,17 @@ +name: Issue +description: General Issue or discussion topic. For usage questions, please follow the "Usage question" link +labels: ["needs triage"] +body: + - type: markdown + attributes: + value: | + Please describe your issue here. + - type: textarea + id: issue-description + attributes: + label: What is your issue? + description: | + Thank you for filing an issue! Please give us further information on how we can help you. + placeholder: Please describe your issue. + validations: + required: true From cdab326bab0cf86f96bcce4292f4fae24bddc7b6 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 2 Mar 2022 14:57:29 +0100 Subject: [PATCH 61/68] fix typos (using codespell) (#6316) * fix typos (using codespell) * revert 'split' --- doc/examples/ROMS_ocean_model.ipynb | 2 +- doc/gallery/plot_colorbar_center.py | 2 +- doc/internals/how-to-add-new-backend.rst | 4 ++-- doc/internals/zarr-encoding-spec.rst | 2 +- doc/roadmap.rst | 2 +- doc/user-guide/time-series.rst | 2 +- doc/whats-new.rst | 12 ++++++------ xarray/backends/common.py | 2 +- xarray/backends/file_manager.py | 2 +- xarray/backends/pseudonetcdf_.py | 2 +- xarray/backends/zarr.py | 2 +- xarray/convert.py | 2 +- xarray/core/accessor_str.py | 4 ++-- xarray/core/combine.py | 2 +- xarray/core/computation.py | 2 +- xarray/core/concat.py | 2 +- xarray/core/dataarray.py | 4 ++-- xarray/core/dataset.py | 12 ++++++------ xarray/core/merge.py | 2 +- xarray/core/missing.py | 8 ++++---- xarray/core/rolling.py | 4 ++-- xarray/core/variable.py | 2 +- xarray/tests/test_backends.py | 6 +++--- xarray/tests/test_calendar_ops.py | 2 +- xarray/tests/test_coarsen.py | 4 ++-- xarray/tests/test_computation.py | 2 +- xarray/tests/test_dask.py | 2 +- xarray/tests/test_dataarray.py | 6 +++--- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_interp.py | 2 +- xarray/tests/test_missing.py | 2 +- xarray/tests/test_plot.py | 6 +++--- 32 files changed, 56 insertions(+), 56 deletions(-) diff --git a/doc/examples/ROMS_ocean_model.ipynb b/doc/examples/ROMS_ocean_model.ipynb index 82d7a8d58af..d5c76380525 100644 --- a/doc/examples/ROMS_ocean_model.ipynb +++ b/doc/examples/ROMS_ocean_model.ipynb @@ -77,7 +77,7 @@ "ds = xr.tutorial.open_dataset(\"ROMS_example.nc\", chunks={\"ocean_time\": 1})\n", "\n", "# This is a way to turn on chunking and lazy evaluation. Opening with mfdataset, or\n", - "# setting the chunking in the open_dataset would also achive this.\n", + "# setting the chunking in the open_dataset would also achieve this.\n", "ds" ] }, diff --git a/doc/gallery/plot_colorbar_center.py b/doc/gallery/plot_colorbar_center.py index 42d6448adf6..da3447a1f25 100644 --- a/doc/gallery/plot_colorbar_center.py +++ b/doc/gallery/plot_colorbar_center.py @@ -38,6 +38,6 @@ ax4.set_title("Celsius: center=False") ax4.set_ylabel("") -# Mke it nice +# Make it nice plt.tight_layout() plt.show() diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index 940fa7a4d1e..17f94189504 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -317,7 +317,7 @@ grouped in three types of indexes :py:class:`~xarray.core.indexing.OuterIndexer` and :py:class:`~xarray.core.indexing.VectorizedIndexer`. This implies that the implementation of the method ``__getitem__`` can be tricky. -In oder to simplify this task, Xarray provides a helper function, +In order to simplify this task, Xarray provides a helper function, :py:func:`~xarray.core.indexing.explicit_indexing_adapter`, that transforms all the input ``indexer`` types (`basic`, `outer`, `vectorized`) in a tuple which is interpreted correctly by your backend. @@ -426,7 +426,7 @@ The ``OUTER_1VECTOR`` indexing shall supports number, slices and at most one list. The behaviour with the list shall be the same of ``OUTER`` indexing. If you support more complex indexing as `explicit indexing` or -`numpy indexing`, you can have a look to the implemetation of Zarr backend and Scipy backend, +`numpy indexing`, you can have a look to the implementation of Zarr backend and Scipy backend, currently available in :py:mod:`~xarray.backends` module. .. _RST preferred_chunks: diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 7fb2383935f..f8bffa6e82f 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -14,7 +14,7 @@ for the storage of the NetCDF data model in Zarr; see discussion. First, Xarray can only read and write Zarr groups. There is currently no support -for reading / writting individual Zarr arrays. Zarr groups are mapped to +for reading / writing individual Zarr arrays. Zarr groups are mapped to Xarray ``Dataset`` objects. Second, from Xarray's point of view, the key difference between diff --git a/doc/roadmap.rst b/doc/roadmap.rst index c59d56fdd6d..d4098cfd35a 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -112,7 +112,7 @@ A cleaner model would be to elevate ``indexes`` to an explicit part of xarray's data model, e.g., as attributes on the ``Dataset`` and ``DataArray`` classes. Indexes would need to be propagated along with coordinates in xarray operations, but will no longer would need to have -a one-to-one correspondance with coordinate variables. Instead, an index +a one-to-one correspondence with coordinate variables. Instead, an index should be able to refer to multiple (possibly multidimensional) coordinates that define it. See `GH 1603 `__ for full details diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst index 36a57e37475..f6b9d0bc35b 100644 --- a/doc/user-guide/time-series.rst +++ b/doc/user-guide/time-series.rst @@ -101,7 +101,7 @@ You can also select a particular time by indexing with a ds.sel(time=datetime.time(12)) -For more details, read the pandas documentation and the section on `Indexing Using Datetime Components `_ (i.e. using the ``.dt`` acessor). +For more details, read the pandas documentation and the section on `Indexing Using Datetime Components `_ (i.e. using the ``.dt`` accessor). .. _dt_accessor: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ae51b9f6ce1..6f939a36f25 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -138,7 +138,7 @@ Bug fixes By `Michael Delgado `_. - `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. -- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat `_. +- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain circumstances (:pull:`5526`). By `Chris Roat `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -706,7 +706,7 @@ Breaking changes By `Alessandro Amici `_. - Functions that are identities for 0d data return the unchanged data if axis is empty. This ensures that Datasets where some variables do - not have the averaged dimensions are not accidentially changed + not have the averaged dimensions are not accidentally changed (:issue:`4885`, :pull:`5207`). By `David Schwörer `_. - :py:attr:`DataArray.coarsen` and :py:attr:`Dataset.coarsen` no longer support passing ``keep_attrs`` @@ -1419,7 +1419,7 @@ New Features Enhancements ~~~~~~~~~~~~ - Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` - We performs independant interpolation sequentially rather than interpolating in + We performs independent interpolation sequentially rather than interpolating in one large multidimensional space. (:issue:`2223`) By `Keisuke Fujii `_. - :py:meth:`DataArray.interp` now support interpolations over chunked dimensions (:pull:`4155`). By `Alexandre Poux `_. @@ -2770,7 +2770,7 @@ Breaking changes - ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`. Call :py:meth:`Dataset.transpose` directly instead. - Iterating over a ``Dataset`` now includes only data variables, not coordinates. - Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now + Similarly, calling ``len`` and ``bool`` on a ``Dataset`` now includes only data variables. - ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks array data, not coordinates. @@ -3908,7 +3908,7 @@ Bug fixes (:issue:`1606`). By `Joe Hamman `_. -- Fix bug when using ``pytest`` class decorators to skiping certain unittests. +- Fix bug when using ``pytest`` class decorators to skipping certain unittests. The previous behavior unintentionally causing additional tests to be skipped (:issue:`1531`). By `Joe Hamman `_. @@ -5656,7 +5656,7 @@ Bug fixes - Several bug fixes related to decoding time units from netCDF files (:issue:`316`, :issue:`330`). Thanks Stefan Pfenninger! - xray no longer requires ``decode_coords=False`` when reading datasets with - unparseable coordinate attributes (:issue:`308`). + unparsable coordinate attributes (:issue:`308`). - Fixed ``DataArray.loc`` indexing with ``...`` (:issue:`318`). - Fixed an edge case that resulting in an error when reindexing multi-dimensional variables (:issue:`315`). diff --git a/xarray/backends/common.py b/xarray/backends/common.py index f659d71760b..ad92a6c5869 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -160,7 +160,7 @@ def sync(self, compute=True): import dask.array as da # TODO: consider wrapping targets with dask.delayed, if this makes - # for any discernable difference in perforance, e.g., + # for any discernible difference in perforance, e.g., # targets = [dask.delayed(t) for t in self.targets] delayed_store = da.store( diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py index 47a4201539b..06be03e4e44 100644 --- a/xarray/backends/file_manager.py +++ b/xarray/backends/file_manager.py @@ -204,7 +204,7 @@ def _acquire_with_cache_info(self, needs_lock=True): kwargs["mode"] = self._mode file = self._opener(*self._args, **kwargs) if self._mode == "w": - # ensure file doesn't get overriden when opened again + # ensure file doesn't get overridden when opened again self._mode = "a" self._cache[self._key] = file return file, False diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index da178926dbe..a2ca7f0206c 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -105,7 +105,7 @@ class PseudoNetCDFBackendEntrypoint(BackendEntrypoint): available = has_pseudonetcdf # *args and **kwargs are not allowed in open_backend_dataset_ kwargs, - # unless the open_dataset_parameters are explicity defined like this: + # unless the open_dataset_parameters are explicitly defined like this: open_dataset_parameters = ( "filename_or_obj", "mask_and_scale", diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index b3f62bb798d..97517818d07 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -179,7 +179,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): - # Zarr arrays do not have dimenions. To get around this problem, we add + # Zarr arrays do not have dimensions. To get around this problem, we add # an attribute that specifies the dimension. We have to hide this attribute # when we send the attributes to the user. # zarr_obj can be either a zarr group or zarr array diff --git a/xarray/convert.py b/xarray/convert.py index 0fbd1e13163..93b0a30e57b 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -235,7 +235,7 @@ def _iris_cell_methods_to_str(cell_methods_obj): def _name(iris_obj, default="unknown"): - """Mimicks `iris_obj.name()` but with different name resolution order. + """Mimics `iris_obj.name()` but with different name resolution order. Similar to iris_obj.name() method, but using iris_obj.var_name first to enable roundtripping. diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 9c9de76c0ed..54c9b857a7a 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -456,7 +456,7 @@ def cat( Strings or array-like of strings to concatenate elementwise with the current DataArray. sep : str or array-like of str, default: "". - Seperator to use between strings. + Separator to use between strings. It is broadcast in the same way as the other input strings. If array-like, its dimensions will be placed at the end of the output array dimensions. @@ -539,7 +539,7 @@ def join( Only one dimension is allowed at a time. Optional for 0D or 1D DataArrays, required for multidimensional DataArrays. sep : str or array-like, default: "". - Seperator to use between strings. + Separator to use between strings. It is broadcast in the same way as the other input strings. If array-like, its dimensions will be placed at the end of the output array dimensions. diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 081b53391ba..d23a58522e6 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -135,7 +135,7 @@ def _infer_concat_order_from_coords(datasets): order = rank.astype(int).values - 1 # Append positions along extra dimension to structure which - # encodes the multi-dimensional concatentation order + # encodes the multi-dimensional concatenation order tile_ids = [ tile_id + (position,) for tile_id, position in zip(tile_ids, order) ] diff --git a/xarray/core/computation.py b/xarray/core/computation.py index c11bd1a78a4..ce37251576a 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1941,7 +1941,7 @@ def unify_chunks(*objects: T_Xarray) -> tuple[T_Xarray, ...]: for obj in objects ] - # Get argumets to pass into dask.array.core.unify_chunks + # Get arguments to pass into dask.array.core.unify_chunks unify_chunks_args = [] sizes: dict[Hashable, int] = {} for ds in datasets: diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 4621e622d42..1e6e246322e 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -455,7 +455,7 @@ def _dataset_concat( if (dim in coord_names or dim in data_names) and dim not in dim_names: datasets = [ds.expand_dims(dim) for ds in datasets] - # determine which variables to concatentate + # determine which variables to concatenate concat_over, equals, concat_dim_lengths = _calc_concat_over( datasets, dim, dim_names, data_vars, coords, compat ) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3d720f7fc8b..3df9f7ca8a4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2584,7 +2584,7 @@ def interpolate_na( ) def ffill(self, dim: Hashable, limit: int = None) -> DataArray: - """Fill NaN values by propogating values forward + """Fill NaN values by propagating values forward *Requires bottleneck.* @@ -2609,7 +2609,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> DataArray: return ffill(self, dim, limit=limit) def bfill(self, dim: Hashable, limit: int = None) -> DataArray: - """Fill NaN values by propogating values backward + """Fill NaN values by propagating values backward *Requires bottleneck.* diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 90684c4db87..a1d7209bc75 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -379,7 +379,7 @@ def _check_chunks_compatibility(var, chunks, preferred_chunks): def _get_chunk(var, chunks): - # chunks need to be explicity computed to take correctly into accout + # chunks need to be explicitly computed to take correctly into account # backend preferred chunking import dask.array as da @@ -1529,7 +1529,7 @@ def __setitem__(self, key: Hashable | list[Hashable] | Mapping, value) -> None: except Exception as e: if processed: raise RuntimeError( - "An error occured while setting values of the" + "An error occurred while setting values of the" f" variable '{name}'. The following variables have" f" been successfully updated:\n{processed}" ) from e @@ -1976,7 +1976,7 @@ def to_zarr( metadata for existing stores (falling back to non-consolidated). append_dim : hashable, optional If set, the dimension along which the data will be appended. All - other dimensions on overriden variables must remain the same size. + other dimensions on overridden variables must remain the same size. region : dict, optional Optional mapping from dimension names to integer slices along dataset dimensions to indicate the region of existing zarr array(s) @@ -2001,7 +2001,7 @@ def to_zarr( Set False to override this restriction; however, data may become corrupted if Zarr arrays are written in parallel. This option may be useful in combination with ``compute=False`` to initialize a Zarr from an existing - Dataset with aribtrary chunk structure. + Dataset with arbitrary chunk structure. storage_options : dict, optional Any additional parameters for the storage backend (ignored for local paths). @@ -4930,7 +4930,7 @@ def interpolate_na( return new def ffill(self, dim: Hashable, limit: int = None) -> Dataset: - """Fill NaN values by propogating values forward + """Fill NaN values by propagating values forward *Requires bottleneck.* @@ -4956,7 +4956,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> Dataset: return new def bfill(self, dim: Hashable, limit: int = None) -> Dataset: - """Fill NaN values by propogating values backward + """Fill NaN values by propagating values backward *Requires bottleneck.* diff --git a/xarray/core/merge.py b/xarray/core/merge.py index d5307678f89..e5407ae79c3 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -587,7 +587,7 @@ def merge_core( Parameters ---------- objects : list of mapping - All values must be convertable to labeled arrays. + All values must be convertible to labeled arrays. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional Compatibility checks to use when merging variables. join : {"outer", "inner", "left", "right"}, optional diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 39e7730dd58..c1776145e21 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -573,7 +573,7 @@ def _localize(var, indexes_coords): def _floatize_x(x, new_x): """Make x and new_x float. - This is particulary useful for datetime dtype. + This is particularly useful for datetime dtype. x, new_x: tuple of np.ndarray """ x = list(x) @@ -624,7 +624,7 @@ def interp(var, indexes_coords, method, **kwargs): kwargs["bounds_error"] = kwargs.get("bounds_error", False) result = var - # decompose the interpolation into a succession of independant interpolation + # decompose the interpolation into a succession of independent interpolation for indexes_coords in decompose_interp(indexes_coords): var = result @@ -731,7 +731,7 @@ def interp_func(var, x, new_x, method, kwargs): for i in range(new_x[0].ndim) } - # if usefull, re-use localize for each chunk of new_x + # if useful, re-use localize for each chunk of new_x localize = (method in ["linear", "nearest"]) and (new_x[0].chunks is not None) # scipy.interpolate.interp1d always forces to float. @@ -825,7 +825,7 @@ def _dask_aware_interpnd(var, *coords, interp_func, interp_kwargs, localize=True def decompose_interp(indexes_coords): - """Decompose the interpolation into a succession of independant interpolation keeping the order""" + """Decompose the interpolation into a succession of independent interpolation keeping the order""" dest_dims = [ dest[1].dims if dest[1].ndim > 0 else [dim] diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 0bc07c1aaeb..f2ac9d979ae 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -33,7 +33,7 @@ Returns ------- reduced : same type as caller - New object with `{name}` applied along its rolling dimnension. + New object with `{name}` applied along its rolling dimension. """ @@ -767,7 +767,7 @@ def __init__(self, obj, windows, boundary, side, coord_func): exponential window along (e.g. `time`) to the size of the moving window. boundary : 'exact' | 'trim' | 'pad' If 'exact', a ValueError will be raised if dimension size is not a - multiple of window size. If 'trim', the excess indexes are trimed. + multiple of window size. If 'trim', the excess indexes are trimmed. If 'pad', NA will be padded. side : 'left' or 'right' or mapping from dimension to 'left' or 'right' coord_func : mapping from coordinate name to func. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 6db795ce26f..74f394b68ca 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -700,7 +700,7 @@ def _broadcast_indexes_outer(self, key): return dims, OuterIndexer(tuple(new_key)), None def _nonzero(self): - """Equivalent numpy's nonzero but returns a tuple of Varibles.""" + """Equivalent numpy's nonzero but returns a tuple of Variables.""" # TODO we should replace dask's native nonzero # after https://github.com/dask/dask/issues/1076 is implemented. nonzeros = np.nonzero(self.data) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1d0342dd344..e0bc0b10437 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1937,7 +1937,7 @@ def test_chunk_encoding_with_dask(self): with self.roundtrip(ds_chunk4) as actual: assert (4,) == actual["var1"].encoding["chunks"] - # TODO: remove this failure once syncronized overlapping writes are + # TODO: remove this failure once synchronized overlapping writes are # supported by xarray ds_chunk4["var1"].encoding.update({"chunks": 5}) with pytest.raises(NotImplementedError, match=r"named 'var1' would overlap"): @@ -2255,7 +2255,7 @@ def test_write_region_mode(self, mode): @requires_dask def test_write_preexisting_override_metadata(self): - """Metadata should be overriden if mode="a" but not in mode="r+".""" + """Metadata should be overridden if mode="a" but not in mode="r+".""" original = Dataset( {"u": (("x",), np.zeros(10), {"variable": "original"})}, attrs={"global": "original"}, @@ -2967,7 +2967,7 @@ def test_open_fileobj(self): with pytest.raises(TypeError, match="not a valid NetCDF 3"): open_dataset(f, engine="scipy") - # TOOD: this additional open is required since scipy seems to close the file + # TODO: this additional open is required since scipy seems to close the file # when it fails on the TypeError (though didn't when we used # `raises_regex`?). Ref https://github.com/pydata/xarray/pull/5191 with open(tmp_file, "rb") as f: diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py index 8d1ddcf4689..0f0948aafc5 100644 --- a/xarray/tests/test_calendar_ops.py +++ b/xarray/tests/test_calendar_ops.py @@ -161,7 +161,7 @@ def test_convert_calendar_errors(): with pytest.raises(ValueError, match="Argument `align_on` must be specified"): convert_calendar(src_nl, "360_day") - # Standard doesn't suuport year 0 + # Standard doesn't support year 0 with pytest.raises( ValueError, match="Source time coordinate contains dates with year 0" ): diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index 9b1919b136c..7d6613421d5 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -158,7 +158,7 @@ def test_coarsen_keep_attrs(funcname, argument) -> None: @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median")) def test_coarsen_reduce(ds, window, name) -> None: - # Use boundary="trim" to accomodate all window sizes used in tests + # Use boundary="trim" to accommodate all window sizes used in tests coarsen_obj = ds.coarsen(time=window, boundary="trim") # add nan prefix to numpy methods to get similar behavior as bottleneck @@ -241,7 +241,7 @@ def test_coarsen_da_reduce(da, window, name) -> None: if da.isnull().sum() > 1 and window == 1: pytest.skip("These parameters lead to all-NaN slices") - # Use boundary="trim" to accomodate all window sizes used in tests + # Use boundary="trim" to accommodate all window sizes used in tests coarsen_obj = da.coarsen(time=window, boundary="trim") # add nan prefix to numpy methods to get similar # behavior as bottleneck diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 7f601c6195a..dac3c17b1f1 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2038,7 +2038,7 @@ def test_polyval(use_dask, use_datetime) -> None: "cartesian", -1, ], - [ # Test filling inbetween with coords: + [ # Test filling in between with coords: xr.DataArray( [1, 2], dims=["cartesian"], diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 48432f319b2..42d8df57cb7 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -460,7 +460,7 @@ def test_concat_loads_variables(self): assert isinstance(out["c"].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=[], coords=[]) - # variables are loaded once as we are validing that they're identical + # variables are loaded once as we are validating that they're identical assert kernel_call_count == 12 assert isinstance(out["d"].data, np.ndarray) assert isinstance(out["c"].data, np.ndarray) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fc82c03c5d9..3939da08a67 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -174,7 +174,7 @@ def test_get_index_size_zero(self): def test_struct_array_dims(self): """ - This test checks subraction of two DataArrays for the case + This test checks subtraction of two DataArrays for the case when dimension is a structured array. """ # GH837, GH861 @@ -197,7 +197,7 @@ def test_struct_array_dims(self): assert_identical(actual, expected) - # checking array subraction when dims are not the same + # checking array subtraction when dims are not the same p_data_alt = np.array( [("Abe", 180), ("Stacy", 151), ("Dick", 200)], dtype=[("name", "|S256"), ("height", object)], @@ -213,7 +213,7 @@ def test_struct_array_dims(self): assert_identical(actual, expected) - # checking array subraction when dims are not the same and one + # checking array subtraction when dims are not the same and one # is np.nan p_data_nan = np.array( [("Abe", 180), ("Stacy", np.nan), ("Dick", 200)], diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7ff75fb791b..d726920acce 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -587,7 +587,7 @@ def test_get_index(self): def test_attr_access(self): ds = Dataset( - {"tmin": ("x", [42], {"units": "Celcius"})}, attrs={"title": "My test data"} + {"tmin": ("x", [42], {"units": "Celsius"})}, attrs={"title": "My test data"} ) assert_identical(ds.tmin, ds["tmin"]) assert_identical(ds.tmin.x, ds.x) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index f6d8a7cfcb0..2a6de0be550 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -770,7 +770,7 @@ def test_decompose(method): ], ) def test_interpolate_chunk_1d(method, data_ndim, interp_ndim, nscalar, chunked): - """Interpolate nd array with multiple independant indexers + """Interpolate nd array with multiple independent indexers It should do a series of 1d interpolation """ diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 2bf5af31fa5..3721c92317d 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -116,7 +116,7 @@ def test_interpolate_pd_compat(): @pytest.mark.parametrize("method", ["barycentric", "krog", "pchip", "spline", "akima"]) def test_scipy_methods_function(method): # Note: Pandas does some wacky things with these methods and the full - # integration tests wont work. + # integration tests won't work. da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) actual = da.interpolate_na(method=method, dim="time") assert (da.count("time") <= actual.count("time")).all() diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index c0b6d355441..8ded4c6515f 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -526,7 +526,7 @@ def test__infer_interval_breaks_logscale_invalid_coords(self): x = np.linspace(0, 5, 6) with pytest.raises(ValueError): _infer_interval_breaks(x, scale="log") - # Check if error is raised after nagative values in the array + # Check if error is raised after negative values in the array x = np.linspace(-5, 5, 11) with pytest.raises(ValueError): _infer_interval_breaks(x, scale="log") @@ -1506,7 +1506,7 @@ def test_convenient_facetgrid(self): else: assert "" == ax.get_xlabel() - # Infering labels + # Inferring labels g = self.plotfunc(d, col="z", col_wrap=2) assert_array_equal(g.axes.shape, [2, 2]) for (y, x), ax in np.ndenumerate(g.axes): @@ -1986,7 +1986,7 @@ def test_convenient_facetgrid(self): assert "y" == ax.get_ylabel() assert "x" == ax.get_xlabel() - # Infering labels + # Inferring labels g = self.plotfunc(d, col="z", col_wrap=2) assert_array_equal(g.axes.shape, [2, 2]) for (y, x), ax in np.ndenumerate(g.axes): From 9b4d0b29c319f4b68f89328b1bf558711f339504 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 2 Mar 2022 10:49:23 -0500 Subject: [PATCH 62/68] v2022.03.0 release notes (#6319) * release summary * update first calver relase number --- HOW_TO_RELEASE.md | 2 +- doc/whats-new.rst | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 893a6d77168..8d82277ae55 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -109,6 +109,6 @@ upstream https://github.com/pydata/xarray (push) ## Note on version numbering -As of 2022.02.0, we utilize the [CALVER](https://calver.org/) version system. +As of 2022.03.0, we utilize the [CALVER](https://calver.org/) version system. Specifically, we have adopted the pattern `YYYY.MM.X`, where `YYYY` is a 4-digit year (e.g. `2022`), `MM` is a 2-digit zero-padded month (e.g. `01` for January), and `X` is the release number (starting at zero at the start of each month and incremented once for each additional release). diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6f939a36f25..25e7071f9d6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,10 +14,18 @@ What's New np.random.seed(123456) -.. _whats-new.2022.02.0: +.. _whats-new.2022.03.0: -v2022.02.0 (unreleased) ------------------------ +v2022.03.0 (2 March 2022) +------------------------- + +This release brings a number of small improvements, as well as a move to `calendar versioning `_ (:issue:`6176`). + +Many thanks to the 16 contributors to the v2022.02.0 release! + +Aaron Spring, Alan D. Snow, Anderson Banihirwe, crusaderky, Illviljan, Joe Hamman, Jonas Gliß, +Lukas Pilz, Martin Bergemann, Mathias Hauser, Maximilian Roos, Romain Caneill, Stan West, Stijn Van Hoey, +Tobias Kölling, and Tom Nicholas. New Features @@ -27,7 +35,6 @@ New Features :py:meth:`CFTimeIndex.shift` if ``shift_freq`` is between ``Day`` and ``Microsecond``. (:issue:`6134`, :pull:`6135`). By `Aaron Spring `_. - - Enbable to provide more keyword arguments to `pydap` backend when reading OpenDAP datasets (:issue:`6274`). By `Jonas Gliß `. From 56c7b8a0782d132e8c4d5913fb6a40ecb0ee60e8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 2 Mar 2022 11:20:29 -0500 Subject: [PATCH 63/68] New whatsnew section --- doc/whats-new.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6f939a36f25..f12d6525133 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,35 @@ What's New np.random.seed(123456) +.. _whats-new.2022.03.1: + +v2022.03.1 (unreleased) +--------------------- + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + .. _whats-new.2022.02.0: v2022.02.0 (unreleased) From a01460bd90e3ae31a32e40005f33c2919efba8bb Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 3 Mar 2022 10:43:35 +0100 Subject: [PATCH 64/68] quantile: use skipna=None (#6303) * quantile: use skipna=None * better term * move whats new entry * remove duplicated entry --- doc/whats-new.rst | 4 +++- xarray/core/dataarray.py | 7 +++++-- xarray/core/dataset.py | 7 +++++-- xarray/core/groupby.py | 7 +++++-- xarray/core/variable.py | 12 ++++++++++-- xarray/tests/test_dataarray.py | 12 ++++++++---- xarray/tests/test_dataset.py | 3 ++- xarray/tests/test_groupby.py | 25 +++++++++++++++++++++++++ xarray/tests/test_variable.py | 12 ++++++++---- 9 files changed, 71 insertions(+), 18 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fdedc18300f..bfd5f27cbec 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,9 @@ Deprecations Bug fixes ~~~~~~~~~ +- Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and + ensure it skips missing values for float dtypes (consistent with other methods). This should + not change the behavior (:pull:`6303`). By `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ @@ -86,7 +89,6 @@ Deprecations Bug fixes ~~~~~~~~~ - - Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling `_. - Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`). By `Martin Bergemann `_ and `Stan West `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3df9f7ca8a4..e04e5cb9c51 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3440,7 +3440,7 @@ def quantile( dim: str | Sequence[Hashable] | None = None, method: QUANTILE_METHODS = "linear", keep_attrs: bool = None, - skipna: bool = True, + skipna: bool = None, interpolation: QUANTILE_METHODS = None, ) -> DataArray: """Compute the qth quantile of the data along the specified dimension. @@ -3486,7 +3486,10 @@ def quantile( the original object to the new one. If False (default), the new object will be returned without attributes. skipna : bool, optional - Whether to skip missing values when aggregating. + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). Returns ------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a1d7209bc75..b3112bdc7ab 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6160,7 +6160,7 @@ def quantile( method: QUANTILE_METHODS = "linear", numeric_only: bool = False, keep_attrs: bool = None, - skipna: bool = True, + skipna: bool = None, interpolation: QUANTILE_METHODS = None, ): """Compute the qth quantile of the data along the specified dimension. @@ -6209,7 +6209,10 @@ def quantile( numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. skipna : bool, optional - Whether to skip missing values when aggregating. + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). Returns ------- diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index d7aa6749592..d3ec824159c 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -554,7 +554,7 @@ def quantile( dim=None, method="linear", keep_attrs=None, - skipna=True, + skipna=None, interpolation=None, ): """Compute the qth quantile over each array in the groups and @@ -597,7 +597,10 @@ def quantile( version 1.22.0. skipna : bool, optional - Whether to skip missing values when aggregating. + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). Returns ------- diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 74f394b68ca..c8d46d20d46 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1978,7 +1978,7 @@ def quantile( dim: str | Sequence[Hashable] | None = None, method: QUANTILE_METHODS = "linear", keep_attrs: bool = None, - skipna: bool = True, + skipna: bool = None, interpolation: QUANTILE_METHODS = None, ) -> Variable: """Compute the qth quantile of the data along the specified dimension. @@ -2024,6 +2024,11 @@ def quantile( If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). Returns ------- @@ -2059,7 +2064,10 @@ def quantile( method = interpolation - _quantile_func = np.nanquantile if skipna else np.quantile + if skipna or (skipna is None and self.dtype.kind in "cfO"): + _quantile_func = np.nanquantile + else: + _quantile_func = np.quantile if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 3939da08a67..55c68b7ff6b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2516,15 +2516,19 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) - @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("skipna", [True, False, None]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) def test_quantile(self, q, axis, dim, skipna) -> None: - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) - _percentile_func = np.nanpercentile if skipna else np.percentile - expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) + + va = self.va.copy(deep=True) + va[0, 0] = np.NaN + + actual = DataArray(va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) + _percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile + expected = _percentile_func(va.values, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) if is_scalar(q): assert "quantile" not in actual.dims diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d726920acce..8d6c4f96857 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4718,10 +4718,11 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) - @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("skipna", [True, False, None]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) def test_quantile(self, q, skipna) -> None: ds = create_test_data(seed=123) + ds.var1.data[0, 0] = np.NaN for dim in [None, "dim1", ["dim1"]]: ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 1ec2a53c131..4b6da82bdc7 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -203,6 +203,17 @@ def test_da_groupby_quantile() -> None: actual = array.groupby("x").quantile([0, 1]) assert_identical(expected, actual) + array = xr.DataArray( + data=[np.NaN, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x" + ) + + for skipna in (True, False, None): + e = [np.NaN, 5] if skipna is False else [2.5, 5] + + expected = xr.DataArray(data=e, coords={"x": [1, 2], "quantile": 0.5}, dims="x") + actual = array.groupby("x").quantile(0.5, skipna=skipna) + assert_identical(expected, actual) + # Multiple dimensions array = xr.DataArray( data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], @@ -306,6 +317,20 @@ def test_ds_groupby_quantile() -> None: actual = ds.groupby("x").quantile([0, 1]) assert_identical(expected, actual) + ds = xr.Dataset( + data_vars={"a": ("x", [np.NaN, 2, 3, 4, 5, 6])}, + coords={"x": [1, 1, 1, 2, 2, 2]}, + ) + + for skipna in (True, False, None): + e = [np.NaN, 5] if skipna is False else [2.5, 5] + + expected = xr.Dataset( + data_vars={"a": ("x", e)}, coords={"quantile": 0.5, "x": [1, 2]} + ) + actual = ds.groupby("x").quantile(0.5, skipna=skipna) + assert_identical(expected, actual) + # Multiple dimensions ds = xr.Dataset( data_vars={ diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index a88d5a22c0d..b8e2f6f4582 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1700,16 +1700,20 @@ def raise_if_called(*args, **kwargs): with set_options(use_bottleneck=False): v.min() - @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("skipna", [True, False, None]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) def test_quantile(self, q, axis, dim, skipna): - v = Variable(["x", "y"], self.d) + + d = self.d.copy() + d[0, 0] = np.NaN + + v = Variable(["x", "y"], d) actual = v.quantile(q, dim=dim, skipna=skipna) - _percentile_func = np.nanpercentile if skipna else np.percentile - expected = _percentile_func(self.d, np.array(q) * 100, axis=axis) + _percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile + expected = _percentile_func(d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) @requires_dask From f42ac28629b7b2047f859f291e1d755c36f2e834 Mon Sep 17 00:00:00 2001 From: Stan West <38358698+stanwest@users.noreply.github.com> Date: Thu, 3 Mar 2022 12:01:14 -0500 Subject: [PATCH 65/68] Lengthen underline, correct spelling, and reword (#6326) --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bfd5f27cbec..b22c6e4d858 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -17,7 +17,7 @@ What's New .. _whats-new.2022.03.1: v2022.03.1 (unreleased) ---------------------- +----------------------- New Features ~~~~~~~~~~~~ @@ -68,7 +68,7 @@ New Features :py:meth:`CFTimeIndex.shift` if ``shift_freq`` is between ``Day`` and ``Microsecond``. (:issue:`6134`, :pull:`6135`). By `Aaron Spring `_. -- Enbable to provide more keyword arguments to `pydap` backend when reading +- Enable providing more keyword arguments to the `pydap` backend when reading OpenDAP datasets (:issue:`6274`). By `Jonas Gliß `. - Allow :py:meth:`DataArray.drop_duplicates` to drop duplicates along multiple dimensions at once, From 29a87cc110f1a1ff7b21c308ba7277963b51ada3 Mon Sep 17 00:00:00 2001 From: Stan West <38358698+stanwest@users.noreply.github.com> Date: Mon, 7 Mar 2022 08:13:50 -0500 Subject: [PATCH 66/68] In documentation on adding a new backend, add missing import and tweak headings (#6330) --- doc/internals/how-to-add-new-backend.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index 17f94189504..506a8eb21be 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -172,6 +172,7 @@ Xarray :py:meth:`~xarray.open_dataset`, and returns a boolean. Decoders ^^^^^^^^ + The decoders implement specific operations to transform data from on-disk representation to Xarray representation. @@ -199,6 +200,11 @@ performs the inverse transformation. In the following an example on how to use the coders ``decode`` method: +.. ipython:: python + :suppress: + + import xarray as xr + .. ipython:: python var = xr.Variable( @@ -239,7 +245,7 @@ interface only the boolean keywords related to the supported decoders. .. _RST backend_registration: How to register a backend -+++++++++++++++++++++++++++ ++++++++++++++++++++++++++ Define a new entrypoint in your ``setup.py`` (or ``setup.cfg``) with: @@ -280,8 +286,9 @@ See https://python-poetry.org/docs/pyproject/#plugins for more information on Po .. _RST lazy_loading: -How to support Lazy Loading +How to support lazy loading +++++++++++++++++++++++++++ + If you want to make your backend effective with big datasets, then you should support lazy loading. Basically, you shall replace the :py:class:`numpy.ndarray` inside the @@ -380,8 +387,9 @@ opening files, we therefore suggest to use the helper class provided by Xarray .. _RST indexing: -Indexing Examples +Indexing examples ^^^^^^^^^^^^^^^^^ + **BASIC** In the ``BASIC`` indexing support, numbers and slices are supported. From 265ec7b4b8f6ee46120f125875685569e4115634 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Mar 2022 07:28:15 -0700 Subject: [PATCH 67/68] Bump actions/checkout from 2 to 3 (#6337) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmarks.yml | 2 +- .github/workflows/ci-additional.yaml | 8 ++++---- .github/workflows/ci.yaml | 4 ++-- .github/workflows/pypi-release.yaml | 2 +- .github/workflows/upstream-dev-ci.yaml | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index de506546ac9..6d482445f96 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -16,7 +16,7 @@ jobs: steps: # We need the full repo to avoid this issue # https://github.com/actions/checkout/issues/23 - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 6bd9bcd9d6b..50c95cdebb7 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -22,7 +22,7 @@ jobs: outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1.1 @@ -53,7 +53,7 @@ jobs: "py39-flaky", ] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 # Fetch all history for all branches and tags. @@ -125,7 +125,7 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 # Fetch all history for all branches and tags. - uses: conda-incubator/setup-miniconda@v2 @@ -162,7 +162,7 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 # Fetch all history for all branches and tags. - uses: conda-incubator/setup-miniconda@v2 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1e5db3a73ed..4747b5ae20d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,7 +22,7 @@ jobs: outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1.1 @@ -44,7 +44,7 @@ jobs: # Bookend python versions python-version: ["3.8", "3.9", "3.10"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Set environment variables diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index f09291b9c6e..19ba1f0aa22 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'pydata/xarray' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 - uses: actions/setup-python@v2 diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index f6f97fd67e3..6860f99c4da 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -24,7 +24,7 @@ jobs: outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1.1 @@ -52,7 +52,7 @@ jobs: outputs: artifacts_availability: ${{ steps.status.outputs.ARTIFACTS_AVAILABLE }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 # Fetch all history for all branches and tags. - uses: conda-incubator/setup-miniconda@v2 @@ -110,7 +110,7 @@ jobs: run: shell: bash steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v2 with: python-version: "3.x" From d293f50f9590251ce09543319d1f0dc760466f1b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Mar 2022 07:58:07 -0700 Subject: [PATCH 68/68] Bump actions/setup-python from 2 to 3 (#6338) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- .github/workflows/upstream-dev-ci.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 19ba1f0aa22..c88cf556a50 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v3 name: Install Python with: python-version: 3.8 @@ -50,7 +50,7 @@ jobs: needs: build-artifacts runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v3 name: Install Python with: python-version: 3.8 diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 6860f99c4da..6091306ed8b 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -111,7 +111,7 @@ jobs: shell: bash steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v3 with: python-version: "3.x" - uses: actions/download-artifact@v2