Merge branch 'master' into apply-to-dataset

pydata · Jun 21, 2021 · b59dd1e · b59dd1e
2 parents f2d2880 + 6a101a9
commit b59dd1e
Show file tree

Hide file tree

Showing 50 changed files with 1,729 additions and 1,112 deletions.
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
@@ -146,46 +146,6 @@ jobs:
         run: |
           python -m pytest --doctest-modules xarray --ignore xarray/tests
 
-  typing:
-    name: Type checking (mypy)
-    runs-on: "ubuntu-latest"
-    needs: detect-ci-trigger
-    if: needs.detect-ci-trigger.outputs.triggered == 'false'
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          fetch-depth: 0 # Fetch all history for all branches and tags.
-      - uses: conda-incubator/setup-miniconda@v2
-        with:
-          channels: conda-forge
-          channel-priority: strict
-          mamba-version: "*"
-          activate-environment: xarray-tests
-          auto-update-conda: false
-          python-version: "3.8"
-
-      - name: Install conda dependencies
-        run: |
-          mamba env update -f ci/requirements/environment.yml
-      - name: Install mypy
-        run: |
-          mamba install --file ci/requirements/mypy_only
-      - name: Install xarray
-        run: |
-          python -m pip install --no-deps -e .
-      - name: Version info
-        run: |
-          conda info -a
-          conda list
-          python xarray/util/print_versions.py
-      - name: Run mypy
-        run: |
-          python -m mypy .
-
   min-version-policy:
     name: Minimum Version Policy
     runs-on: "ubuntu-latest"

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -37,7 +37,8 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: ["3.7", "3.8", "3.9"]
+        # Bookend python versions
+        python-version: ["3.7", "3.9"]
     steps:
       - uses: actions/checkout@v2
         with:
@@ -57,8 +58,7 @@ jobs:
         uses: actions/cache@v2
         with:
           path: ~/conda_pkgs_dir
-          key:
-            ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{
+          key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{
             hashFiles('ci/requirements/**.yml') }}
       - uses: conda-incubator/setup-miniconda@v2
         with:
@@ -87,10 +87,17 @@ jobs:
         run: |
           python -c "import xarray"
       - name: Run tests
-        run: |
-          python -m pytest -n 4 \
-             --cov=xarray \
-             --cov-report=xml
+        run: python -m pytest -n 4
+          --cov=xarray
+          --cov-report=xml
+          --junitxml=test-results/${{ runner.os }}-${{ matrix.python-version }}.xml
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: Test results for ${{ runner.os }}-${{ matrix.python-version }}
+          path: test-results/${{ runner.os }}-${{ matrix.python-version }}.xml
 
       - name: Upload code coverage to Codecov
         uses: codecov/codecov-action@v1
@@ -100,3 +107,20 @@ jobs:
           env_vars: RUNNER_OS,PYTHON_VERSION
           name: codecov-umbrella
           fail_ci_if_error: false
+
+  publish-test-results:
+    needs: test
+    runs-on: ubuntu-latest
+    # the build-and-test job might be skipped, we don't need to run this job then
+    if: success() || failure()
+
+    steps:
+      - name: Download Artifacts
+        uses: actions/download-artifact@v2
+        with:
+          path: test-results
+
+      - name: Publish Unit Test Results
+        uses: EnricoMi/publish-unit-test-result-action@v1
+        with:
+          files: test-results/*.xml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
       - id: isort
   # https://github.com/python/black#version-control-integration
   - repo: https://github.com/psf/black
-    rev: 21.5b1
+    rev: 21.6b0
     hooks:
       - id: black
   - repo: https://github.com/keewis/blackdoc
@@ -31,11 +31,20 @@ repos:
   #       args: ["--write", "--compact"]
   - repo: https://github.com/pre-commit/mirrors-mypy
     # version must correspond to the one in .github/workflows/ci-additional.yaml
-    rev: v0.812
+    rev: v0.902
     hooks:
       - id: mypy
         # Copied from setup.cfg
         exclude: "properties|asv_bench"
+        additional_dependencies: [
+            # Type stubs
+            types-python-dateutil,
+            types-pkg_resources,
+            types-PyYAML,
+            types-pytz,
+            # Dependencies that are typed
+            numpy,
+          ]
   # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
   # - repo: https://github.com/asottile/pyupgrade
   #   rev: v1.22.1

diff --git a/ci/requirements/mypy_only b/ci/requirements/mypy_only
diff --git a/doc/_static/dataset-diagram-logo.png b/doc/_static/dataset-diagram-logo.png
diff --git a/doc/api.rst b/doc/api.rst
@@ -37,6 +37,7 @@ Top-level functions
    map_blocks
    show_versions
    set_options
+   unify_chunks
 
 Dataset
 =======
@@ -900,6 +901,7 @@ Advanced API
    Variable
    IndexVariable
    as_variable
+   Context
    register_dataset_accessor
    register_dataarray_accessor
    Dataset.set_close

diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst
@@ -8,6 +8,7 @@ Required dependencies
 
 - Python (3.7 or later)
 - setuptools (40.4 or later)
+- typing-extensions
 - `numpy <http://www.numpy.org/>`__ (1.17 or later)
 - `pandas <http://pandas.pydata.org/>`__ (1.0 or later)
 

diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst
@@ -38,17 +38,28 @@ After decoding the ``_ARRAY_DIMENSIONS`` attribute and assigning the variable
 dimensions, Xarray proceeds to [optionally] decode each variable using its
 standard CF decoding machinery used for NetCDF data (see :py:func:`decode_cf`).
 
+Finally, it's worth noting that Xarray writes (and attempts to read)
+"consolidated metadata" by default (the ``.zmetadata`` file), which is another
+non-standard Zarr extension, albeit one implemented upstream in Zarr-Python.
+You do not need to write consolidated metadata to make Zarr stores readable in
+Xarray, but because Xarray can open these stores much faster, users will see a
+warning about poor performance when reading non-consolidated stores unless they
+explicitly set ``consolidated=False``. See :ref:`io.zarr.consolidated_metadata`
+for more details.
+
 As a concrete example, here we write a tutorial dataset to Zarr and then
 re-open it directly with Zarr:
 
 .. ipython:: python
 
+    import os
     import xarray as xr
     import zarr
 
     ds = xr.tutorial.load_dataset("rasm")
     ds.to_zarr("rasm.zarr", mode="w")
 
     zgroup = zarr.open("rasm.zarr")
+    print(os.listdir("rasm.zarr"))
     print(zgroup.tree())
     dict(zgroup["Tair"].attrs)
diff --git a/doc/tutorials-and-videos.rst b/doc/tutorials-and-videos.rst
@@ -19,7 +19,13 @@ Videos
   :card: text-center
 
   ---
+  Xdev Python Tutorial Seminar Series 2021 seminar introducing Xarray (1 of 2) | Anderson Banihirwe
+  ^^^
+  .. raw:: html
+
+    <iframe width="100%" src="https://www.youtube.com/embed/Ss4ryKukhi4" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 
+  ---
   Xarray's virtual tutorial | October 2020 | Anderson Banihirwe, Deepak Cherian, and Martin Durant
   ^^^
   .. raw:: html
@@ -49,7 +55,6 @@ Videos
 
     <iframe width="100%" src="https://www.youtube.com/embed/J9ypQOnt5l8" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 
-
 Books, Chapters and Articles
 -----------------------------
 

diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst
@@ -234,9 +234,6 @@ arrays). However, you can do normal indexing with dimension names:
     ds[dict(space=[0], time=[0])]
     ds.loc[dict(time="2000-01-01")]
 
-Using indexing to *assign* values to a subset of dataset (e.g.,
-``ds[dict(space=0)] = 1``) is not yet supported.
-
 Dropping labels and dimensions
 ------------------------------
 
@@ -536,6 +533,34 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t
       da.isel(x=[0, 1, 2])[1] = -1
       da
 
+You can also assign values to all variables of a :py:class:`Dataset` at once:
+
+.. ipython:: python
+
+    ds_org = xr.tutorial.open_dataset("eraint_uvz").isel(
+        latitude=slice(56, 59), longitude=slice(255, 258), level=0
+    )
+    # set all values to 0
+    ds = xr.zeros_like(ds_org)
+    ds
+
+    # by integer
+    ds[dict(latitude=2, longitude=2)] = 1
+    ds["u"]
+    ds["v"]
+
+    # by label
+    ds.loc[dict(latitude=47.25, longitude=[11.25, 12])] = 100
+    ds["u"]
+
+    # dataset as new values
+    new_dat = ds_org.loc[dict(latitude=48, longitude=[11.25, 12])]
+    new_dat
+    ds.loc[dict(latitude=47.25, longitude=[11.25, 12])] = new_dat
+    ds["u"]
+
+The dimensions can differ between the variables in the dataset, but all variables need to have at least the dimensions specified in the indexer dictionary.
+The new values must be either a scalar, a :py:class:`DataArray` or a :py:class:`Dataset` itself that contains all variables that also appear in the dataset to be modified.
 
 .. _more_advanced_indexing:
 

diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
@@ -954,34 +954,36 @@ For example:
     Not all native zarr compression and filtering options have been tested with
     xarray.
 
+.. _io.zarr.consolidated_metadata:
+
 Consolidated Metadata
 ~~~~~~~~~~~~~~~~~~~~~
 
 Xarray needs to read all of the zarr metadata when it opens a dataset.
 In some storage mediums, such as with cloud object storage (e.g. amazon S3),
 this can introduce significant overhead, because two separate HTTP calls to the
 object store must be made for each variable in the dataset.
-With version 2.3, zarr will support a feature called *consolidated metadata*,
-which allows all metadata for the entire dataset to be stored with a single
-key (by default called ``.zmetadata``). This can drastically speed up
-opening the store. (For more information on this feature, consult the
+As of Xarray version 0.18, Xarray by default uses a feature called
+*consolidated metadata*, storing all metadata for the entire dataset with a
+single key (by default called ``.zmetadata``). This typically drastically speeds
+up opening the store. (For more information on this feature, consult the
 `zarr docs <https://zarr.readthedocs.io/en/latest/tutorial.html#consolidating-metadata>`_.)
 
-If you have zarr version 2.3 or greater, xarray can write and read stores
-with consolidated metadata. To write consolidated metadata, pass the
-``consolidated=True`` option to the
-:py:attr:`Dataset.to_zarr` method::
-
-    ds.to_zarr('foo.zarr', consolidated=True)
-
-To read a consolidated store, pass the ``consolidated=True`` option to
-:py:func:`open_zarr`::
+By default, Xarray writes consolidated metadata and attempts to read stores
+with consolidated metadata, falling back to use non-consolidated metadata for
+reads. Because this fall-back option is so much slower, Xarray issues a
+``RuntimeWarning`` with guidance when reading with consolidated metadata fails:
 
-    ds = xr.open_zarr('foo.zarr', consolidated=True)
+    Failed to open Zarr store with consolidated metadata, falling back to try
+    reading non-consolidated metadata. This is typically much slower for
+    opening a dataset. To silence this warning, consider:
 
-Xarray can't perform consolidation on pre-existing zarr datasets. This should
-be done directly from zarr, as described in the
-`zarr docs <https://zarr.readthedocs.io/en/latest/tutorial.html#consolidating-metadata>`_.
+    1. Consolidating metadata in this existing store with
+       :py:func:`zarr.consolidate_metadata`.
+    2. Explicitly setting ``consolidated=False``, to avoid trying to read
+       consolidate metadata.
+    3. Explicitly setting ``consolidated=True``, to raise an error in this case
+       instead of falling back to try reading non-consolidated metadata.
 
 .. _io.zarr.appending:
 
@@ -1062,7 +1064,7 @@ and then calling ``to_zarr`` with ``compute=False`` to write only metadata
     ds = xr.Dataset({"foo": ("x", dummies)})
     path = "path/to/directory.zarr"
     # Now we write the metadata without computing any array values
-    ds.to_zarr(path, compute=False, consolidated=True)
+    ds.to_zarr(path, compute=False)
 
 Now, a Zarr store with the correct variable shapes and attributes exists that
 can be filled out by subsequent calls to ``to_zarr``. The ``region`` provides a
@@ -1072,7 +1074,7 @@ data should be written (in index space, not coordinate space), e.g.,
 .. ipython:: python
 
     # For convenience, we'll slice a single dataset, but in the real use-case
-    # we would create them separately, possibly even from separate processes.
+    # we would create them separately possibly even from separate processes.
     ds = xr.Dataset({"foo": ("x", np.arange(30))})
     ds.isel(x=slice(0, 10)).to_zarr(path, region={"x": slice(0, 10)})
     ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": slice(10, 20)})