diff --git a/.binder/environment.yml b/.binder/environment.yml
index 6caea42df87..99a7d9f2494 100644
--- a/.binder/environment.yml
+++ b/.binder/environment.yml
@@ -23,6 +23,7 @@ dependencies:
   - netcdf4
   - numba
   - numpy
+  - packaging
   - pandas
   - pint
   - pip
diff --git a/.github/ISSUE_TEMPLATE/bugreport.yml b/.github/ISSUE_TEMPLATE/bugreport.yml
index 255c7de07d9..043584f3ea6 100644
--- a/.github/ISSUE_TEMPLATE/bugreport.yml
+++ b/.github/ISSUE_TEMPLATE/bugreport.yml
@@ -1,8 +1,6 @@
 name: Bug Report
 description: File a bug report to help us improve
-title: '[Bug]: '
-labels: [bug, 'needs triage']
-assignees: []
+labels: [bug, "needs triage"]
 body:
   - type: textarea
     id: what-happened
@@ -35,14 +33,14 @@ body:
 
         Bug reports that follow these guidelines are easier to diagnose, and so are often handled much more quickly.
         This will be automatically formatted into code, so no need for markdown backticks.
-      render: python
+      render: Python
 
   - type: textarea
     id: log-output
     attributes:
       label: Relevant log output
       description: Please copy and paste any relevant output. This will be automatically formatted into code, so no need for markdown backticks.
-      render: python
+      render: Python
 
   - type: textarea
     id: extra
@@ -54,8 +52,8 @@ body:
   - type: textarea
     id: show-versions
     attributes:
-     label: Environment
-     description: |
-       Paste the output of `xr.show_versions()` here
+      label: Environment
+      description: |
+        Paste the output of `xr.show_versions()` here
     validations:
       required: true
diff --git a/.github/ISSUE_TEMPLATE/misc.yml b/.github/ISSUE_TEMPLATE/misc.yml
new file mode 100644
index 00000000000..94dd2d86567
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/misc.yml
@@ -0,0 +1,17 @@
+name: Issue
+description: General Issue or discussion topic. For usage questions, please follow the "Usage question" link
+labels: ["needs triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please describe your issue here.
+  - type: textarea
+    id: issue-description
+    attributes:
+      label: What is your issue?
+      description: |
+        Thank you for filing an issue! Please give us further information on how we can help you.
+      placeholder: Please describe your issue.
+    validations:
+      required: true
diff --git a/.github/ISSUE_TEMPLATE/newfeature.yml b/.github/ISSUE_TEMPLATE/newfeature.yml
index ec94b0f4b89..77cb15b7d37 100644
--- a/.github/ISSUE_TEMPLATE/newfeature.yml
+++ b/.github/ISSUE_TEMPLATE/newfeature.yml
@@ -1,8 +1,6 @@
 name: Feature Request
 description: Suggest an idea for xarray
-title: '[FEATURE]: '
 labels: [enhancement]
-assignees: []
 body:
   - type: textarea
     id: description
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index de506546ac9..6d482445f96 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -16,7 +16,7 @@ jobs:
     steps:
       # We need the full repo to avoid this issue
       # https://github.com/actions/checkout/issues/23
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/cancel-duplicate-runs.yaml b/.github/workflows/cancel-duplicate-runs.yaml
deleted file mode 100644
index 9f74360b034..00000000000
--- a/.github/workflows/cancel-duplicate-runs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-name: Cancel
-on:
-  workflow_run:
-    workflows: ["CI", "CI Additional", "CI Upstream"]
-    types:
-      - requested
-jobs:
-  cancel:
-    name: Cancel previous runs
-    runs-on: ubuntu-latest
-    if: github.repository == 'pydata/xarray'
-    steps:
-    - uses: styfle/cancel-workflow-action@0.9.1
-      with:
-        workflow_id: ${{ github.event.workflow.id }}
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index fac4bb133b1..50c95cdebb7 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -8,6 +8,10 @@ on:
       - "*"
   workflow_dispatch: # allows you to trigger manually
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   detect-ci-trigger:
     name: detect ci trigger
@@ -18,7 +22,7 @@ jobs:
     outputs:
       triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 2
       - uses: xarray-contrib/ci-trigger@v1.1
@@ -49,7 +53,7 @@ jobs:
             "py39-flaky",
           ]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0 # Fetch all history for all branches and tags.
 
@@ -115,13 +119,13 @@ jobs:
   doctest:
     name: Doctests
     runs-on: "ubuntu-latest"
-    if: github.repository == 'pydata/xarray'
+    if: needs.detect-ci-trigger.outputs.triggered == 'false'
     defaults:
       run:
         shell: bash -l {0}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0 # Fetch all history for all branches and tags.
       - uses: conda-incubator/setup-miniconda@v2
@@ -158,7 +162,7 @@ jobs:
         shell: bash -l {0}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0 # Fetch all history for all branches and tags.
       - uses: conda-incubator/setup-miniconda@v2
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 447507ad25f..4747b5ae20d 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -8,6 +8,10 @@ on:
       - "*"
   workflow_dispatch: # allows you to trigger manually
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   detect-ci-trigger:
     name: detect ci trigger
@@ -18,7 +22,7 @@ jobs:
     outputs:
       triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 2
       - uses: xarray-contrib/ci-trigger@v1.1
@@ -38,9 +42,9 @@ jobs:
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
         # Bookend python versions
-        python-version: ["3.8", "3.9"]
+        python-version: ["3.8", "3.9", "3.10"]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0 # Fetch all history for all branches and tags.
       - name: Set environment variables
@@ -111,6 +115,7 @@ jobs:
   event_file:
     name: "Event File"
     runs-on: ubuntu-latest
+    if: github.repository == 'pydata/xarray'
     steps:
       - name: Upload
         uses: actions/upload-artifact@v2
diff --git a/.github/workflows/publish-test-results.yaml b/.github/workflows/publish-test-results.yaml
index a2e02c28f5a..ba77c1fec3c 100644
--- a/.github/workflows/publish-test-results.yaml
+++ b/.github/workflows/publish-test-results.yaml
@@ -8,6 +8,10 @@ on:
     types:
       - completed
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   publish-test-results:
     name: Publish test results
diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml
index 7bc35952729..c88cf556a50 100644
--- a/.github/workflows/pypi-release.yaml
+++ b/.github/workflows/pypi-release.yaml
@@ -12,10 +12,10 @@ jobs:
     runs-on: ubuntu-latest
     if: github.repository == 'pydata/xarray'
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v3
         name: Install Python
         with:
           python-version: 3.8
@@ -50,7 +50,7 @@ jobs:
     needs: build-artifacts
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v3
         name: Install Python
         with:
           python-version: 3.8
@@ -62,6 +62,14 @@ jobs:
         run: |
           ls -ltrh
           ls -ltrh dist
+
+      - name: Verify the built dist/wheel is valid
+        if: github.event_name == 'push'
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install dist/xarray*.whl
+          python -m xarray.util.print_versions
+
       - name: Publish package to TestPyPI
         if: github.event_name == 'push'
         uses: pypa/gh-action-pypi-publish@v1.5.0
@@ -71,13 +79,6 @@ jobs:
           repository_url: https://test.pypi.org/legacy/
           verbose: true
 
-      - name: Check uploaded package
-        if: github.event_name == 'push'
-        run: |
-          sleep 3
-          python -m pip install --upgrade pip
-          python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade xarray
-          python -m xarray.util.print_versions
 
   upload-to-pypi:
     needs: test-built-dist
diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml
index 49415683d07..6091306ed8b 100644
--- a/.github/workflows/upstream-dev-ci.yaml
+++ b/.github/workflows/upstream-dev-ci.yaml
@@ -10,6 +10,10 @@ on:
     - cron: "0 0 * * *" # Daily “At 00:00” UTC
   workflow_dispatch: # allows you to trigger the workflow run manually
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   detect-ci-trigger:
     name: detect upstream-dev ci trigger
@@ -20,7 +24,7 @@ jobs:
     outputs:
       triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 2
       - uses: xarray-contrib/ci-trigger@v1.1
@@ -44,11 +48,11 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9"]
+        python-version: ["3.10"]
     outputs:
       artifacts_availability: ${{ steps.status.outputs.ARTIFACTS_AVAILABLE }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0 # Fetch all history for all branches and tags.
       - uses: conda-incubator/setup-miniconda@v2
@@ -106,8 +110,8 @@ jobs:
       run:
         shell: bash
     steps:
-      - uses: actions/checkout@v2
-      - uses: actions/setup-python@v2
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
         with:
           python-version: "3.x"
       - uses: actions/download-artifact@v2
@@ -122,7 +126,7 @@ jobs:
           shopt -s globstar
           python .github/workflows/parse_logs.py logs/**/*-log
       - name: Report failures
-        uses: actions/github-script@v5
+        uses: actions/github-script@v6
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
diff --git a/.gitignore b/.gitignore
index 90f4a10ed5f..686c7efa701 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,9 @@ __pycache__
 .hypothesis/
 
 # temp files from docs build
+doc/*.nc
 doc/auto_gallery
-doc/example.nc
+doc/rasm.zarr
 doc/savefig
 
 # C extensions
@@ -72,4 +73,3 @@ xarray/tests/data/*.grib.*.idx
 Icon*
 
 .ipynb_checkpoints
-doc/rasm.zarr
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ecc69e5783a..63a2871b496 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,14 +6,27 @@ repos:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: check-yaml
-  # isort should run before black as black sometimes tweaks the isort output
+      - id: debug-statements
+      - id: mixed-line-ending
+    # This wants to go before isort & flake8
+  - repo: https://github.com/myint/autoflake
+    rev: "v1.4"
+    hooks:
+      - id: autoflake # isort should run before black as black sometimes tweaks the isort output
+        args: ["--in-place", "--ignore-init-module-imports"]
   - repo: https://github.com/PyCQA/isort
     rev: 5.10.1
     hooks:
       - id: isort
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v2.31.0
+    hooks:
+      - id: pyupgrade
+        args:
+          - "--py38-plus"
   # https://github.com/python/black#version-control-integration
   - repo: https://github.com/psf/black
-    rev: 21.12b0
+    rev: 22.1.0
     hooks:
       - id: black
       - id: black-jupyter
@@ -35,9 +48,11 @@ repos:
     rev: v0.931
     hooks:
       - id: mypy
-        # `properies` & `asv_bench` are copied from setup.cfg.
-        # `_typed_ops.py` is added since otherwise mypy will complain (but notably only in pre-commit)
-        exclude: "properties|asv_bench|_typed_ops.py"
+        # Copied from setup.cfg
+        exclude: "properties|asv_bench"
+        # This is slow and so we take it out of the fast-path; requires passing
+        # `--hook-stage manual` to pre-commit
+        stages: [manual]
         additional_dependencies: [
             # Type stubs
             types-python-dateutil,
@@ -47,12 +62,3 @@ repos:
             typing-extensions==3.10.0.0,
             numpy,
           ]
-  # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
-  # - repo: https://github.com/asottile/pyupgrade
-  #   rev: v1.22.1
-  #   hooks:
-  #     - id: pyupgrade
-  #       args:
-  #         - "--py3-only"
-  #         # remove on f-strings in Py3.7
-  #         - "--keep-percent-format"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7a909aefd08..dd9931f907b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1 +1 @@
-Xarray's contributor guidelines [can be found in our online documentation](http://xarray.pydata.org/en/stable/contributing.html)
+Xarray's contributor guidelines [can be found in our online documentation](http://docs.xarray.dev/en/stable/contributing.html)
diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md
index 16dc3b94196..8d82277ae55 100644
--- a/HOW_TO_RELEASE.md
+++ b/HOW_TO_RELEASE.md
@@ -18,65 +18,47 @@ upstream        https://github.com/pydata/xarray (push)
      git switch main
      git pull upstream main
      ```
- 2. Confirm there are no commits on stable that are not yet merged
-    ([ref](https://github.com/pydata/xarray/pull/4440)):
-     ```sh
-     git merge upstream/stable
-     ```
- 3. Add a list of contributors with:
+ 2. Add a list of contributors with:
     ```sh
-    git log "$(git tag --sort="v:refname" | tail -1).." --format=%aN | sort -u | perl -pe 's/\n/$1, /'
+    git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | perl -pe 's/\n/$1, /'
     ```
     This will return the number of contributors:
     ```sh
-    git log $(git tag --sort="v:refname" | tail -1).. --format=%aN | sort -u | wc -l
+    git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | wc -l
     ```
- 4. Write a release summary: ~50 words describing the high level features. This
+ 3. Write a release summary: ~50 words describing the high level features. This
     will be used in the release emails, tweets, GitHub release notes, etc.
- 5. Look over whats-new.rst and the docs. Make sure "What's New" is complete
+ 4. Look over whats-new.rst and the docs. Make sure "What's New" is complete
     (check the date!) and add the release summary at the top.
     Things to watch out for:
     - Important new features should be highlighted towards the top.
     - Function/method references should include links to the API docs.
     - Sometimes notes get added in the wrong section of whats-new, typically
       due to a bad merge. Check for these before a release by using git diff,
-      e.g., `git diff v{0.X.Y-1} whats-new.rst` where {0.X.Y-1} is the previous
+      e.g., `git diff v{YYYY.MM.X-1} whats-new.rst` where {YYYY.MM.X-1} is the previous
       release.
- 6. Open a PR with the release summary and whatsnew changes; in particular the
+ 5. Open a PR with the release summary and whatsnew changes; in particular the
     release headline should get feedback from the team on what's important to include.
- 7. After merging, again ensure your main branch is synced to upstream:
+ 6. After merging, again ensure your main branch is synced to upstream:
      ```sh
      git pull upstream main
      ```
- 8. If you have any doubts, run the full test suite one final time!
+ 7. If you have any doubts, run the full test suite one final time!
       ```sh
       pytest
       ```
- 9. Check that the ReadTheDocs build is passing.
-10. Issue the release on GitHub. Click on "Draft a new release" at
+ 8. Check that the ReadTheDocs build is passing on the `main` branch.
+ 9. Issue the release on GitHub. Click on "Draft a new release" at
     <https://github.com/pydata/xarray/releases>. Type in the version number (with a "v")
     and paste the release summary in the notes.
-11. This should automatically trigger an upload of the new build to PyPI via GitHub Actions.
+ 10. This should automatically trigger an upload of the new build to PyPI via GitHub Actions.
     Check this has run [here](https://github.com/pydata/xarray/actions/workflows/pypi-release.yaml),
     and that the version number you expect is displayed [on PyPI](https://pypi.org/project/xarray/)
-12. Update the stable branch (used by ReadTheDocs) and switch back to main:
-     ```sh
-      git switch stable
-      git rebase main
-      git push --force upstream stable
-      git switch main
-     ```
-    You may need to first fetch it with `git fetch upstream`,
-    and check out a local version with `git checkout -b stable upstream/stable`.
-
-    It's OK to force push to `stable` if necessary. (We also update the stable
-    branch with `git cherry-pick` for documentation only fixes that apply the
-    current released version.)
-13. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst:
+11. Add a section for the next release {YYYY.MM.X+1} to doc/whats-new.rst:
      ```rst
-     .. _whats-new.0.X.Y+1:
+     .. _whats-new.YYYY.MM.X+1:
 
-     v0.X.Y+1 (unreleased)
+     vYYYY.MM.X+1 (unreleased)
      ---------------------
 
      New Features
@@ -103,17 +85,14 @@ upstream        https://github.com/pydata/xarray (push)
      ~~~~~~~~~~~~~~~~
 
      ```
-14. Commit your changes and push to main again:
+12. Commit your changes and push to main again:
       ```sh
       git commit -am 'New whatsnew section'
       git push upstream main
       ```
     You're done pushing to main!
 
-15. Update the docs. Login to <https://readthedocs.org/projects/xray/versions/>
-    and switch your new release tag (at the bottom) from "Inactive" to "Active".
-    It should now build automatically.
-16. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I
+13. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I
     usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader
     list (no more than once every 3-6 months):
       - pydata@googlegroups.com
@@ -130,11 +109,6 @@ upstream        https://github.com/pydata/xarray (push)
 
 ## Note on version numbering
 
-We follow a rough approximation of semantic version. Only major releases (0.X.0)
-should include breaking changes. Minor releases (0.X.Y) are for bug fixes and
-backwards compatible new features, but if a sufficient number of new features
-have arrived we will issue a major release even if there are no compatibility
-breaks.
-
-Once the project reaches a sufficient level of maturity for a 1.0.0 release, we
-intend to follow semantic versioning more strictly.
+As of 2022.03.0, we utilize the [CALVER](https://calver.org/) version system.
+Specifically, we have adopted the pattern `YYYY.MM.X`, where `YYYY` is a 4-digit
+year (e.g. `2022`), `MM` is a 2-digit zero-padded month (e.g. `01` for January), and `X` is the release number (starting at zero at the start of each month and incremented once for each additional release).
diff --git a/README.rst b/README.rst
index f58b0002b62..7a4ad4e1f9f 100644
--- a/README.rst
+++ b/README.rst
@@ -6,7 +6,7 @@ xarray: N-D labeled arrays and datasets
 .. image:: https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg
    :target: https://codecov.io/gh/pydata/xarray
 .. image:: https://readthedocs.org/projects/xray/badge/?version=latest
-   :target: https://xarray.pydata.org/
+   :target: https://docs.xarray.dev/
 .. image:: https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat
   :target: https://pandas.pydata.org/speed/xarray/
 .. image:: https://img.shields.io/pypi/v/xarray.svg
@@ -69,12 +69,12 @@ powerful and concise interface. For example:
 Documentation
 -------------
 
-Learn more about xarray in its official documentation at https://xarray.pydata.org/
+Learn more about xarray in its official documentation at https://docs.xarray.dev/
 
 Contributing
 ------------
 
-You can find information about contributing to xarray at our `Contributing page <https://xarray.pydata.org/en/latest/contributing.html#>`_.
+You can find information about contributing to xarray at our `Contributing page <https://docs.xarray.dev/en/latest/contributing.html#>`_.
 
 Get in touch
 ------------
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index 9f96a2ce021..10b8aead374 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -7,7 +7,7 @@
     "project": "xarray",
 
     // The project's homepage
-    "project_url": "http://xarray.pydata.org/",
+    "project_url": "http://docs.xarray.dev/",
 
     // The URL or local path of the source code repository for the
     // project being benchmarked
diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh
index b021d9de16d..ff5615c17c6 100755
--- a/ci/install-upstream-wheels.sh
+++ b/ci/install-upstream-wheels.sh
@@ -11,6 +11,7 @@ conda uninstall -y --force \
     zarr \
     cftime \
     rasterio \
+    packaging \
     pint \
     bottleneck \
     sparse \
@@ -40,7 +41,8 @@ python -m pip install \
     git+https://github.com/dask/distributed \
     git+https://github.com/zarr-developers/zarr \
     git+https://github.com/Unidata/cftime \
-    git+https://github.com/mapbox/rasterio \
+    git+https://github.com/rasterio/rasterio \
+    git+https://github.com/pypa/packaging \
     git+https://github.com/hgrecco/pint \
     git+https://github.com/pydata/bottleneck \
     git+https://github.com/pydata/sparse \
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
index a27a26d5cac..e5fcc500f70 100644
--- a/ci/requirements/doc.yml
+++ b/ci/requirements/doc.yml
@@ -19,6 +19,7 @@ dependencies:
   - netcdf4>=1.5
   - numba
   - numpy>=1.17
+  - packaging>=20.0
   - pandas>=1.0
   - pooch
   - pip
diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
index ca55e5b4cb9..a31188fec5b 100644
--- a/ci/requirements/environment-windows.yml
+++ b/ci/requirements/environment-windows.yml
@@ -8,7 +8,7 @@ dependencies:
   # - cdms2  # Not available on Windows
   # - cfgrib  # Causes Python interpreter crash on Windows: https://github.com/pydata/xarray/pull/3340
   - cftime
-  - dask-core != 2021.12.0 # https://github.com/pydata/xarray/pull/6111, can remove on next release
+  - dask-core
   - distributed
   - fsspec!=2021.7.0
   - h5netcdf
@@ -23,12 +23,13 @@ dependencies:
   - numba
   - numpy
   - numpy_groupies
+  - packaging
   - pandas
   - pint
   - pip
   - pre-commit
   - pseudonetcdf
-  - pydap
+  # - pydap  # https://github.com/pydap/pydap/pull/210
   # - pynio  # Not available on Windows
   - pytest
   - pytest-cov
diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
index b006c33e611..1ac4f87d34a 100644
--- a/ci/requirements/environment.yml
+++ b/ci/requirements/environment.yml
@@ -10,7 +10,7 @@ dependencies:
   - cdms2
   - cfgrib
   - cftime
-  - dask-core != 2021.12.0 # https://github.com/pydata/xarray/pull/6111, can remove on next release
+  - dask-core
   - distributed
   - fsspec!=2021.7.0
   - h5netcdf
@@ -26,17 +26,19 @@ dependencies:
   - numexpr
   - numpy
   - numpy_groupies
+  - packaging
   - pandas
   - pint
   - pip
   - pooch
   - pre-commit
   - pseudonetcdf
-  - pydap
+  # - pydap  # https://github.com/pydap/pydap/pull/210
   # - pynio: not compatible with netCDF4>1.5.3; only tested in py37-bare-minimum
   - pytest
   - pytest-cov
   - pytest-env
+  - pytest-github-actions-annotate-failures
   - pytest-xdist
   - rasterio
   - scipy
diff --git a/ci/requirements/py38-bare-minimum.yml b/ci/requirements/py38-bare-minimum.yml
index c6e3ac504a8..5986ec7186b 100644
--- a/ci/requirements/py38-bare-minimum.yml
+++ b/ci/requirements/py38-bare-minimum.yml
@@ -11,4 +11,5 @@ dependencies:
   - pytest-env
   - pytest-xdist
   - numpy=1.18
+  - packaging=20.0
   - pandas=1.1
diff --git a/ci/requirements/py38-min-all-deps.yml b/ci/requirements/py38-min-all-deps.yml
index a6459b92ccb..76e2b28093d 100644
--- a/ci/requirements/py38-min-all-deps.yml
+++ b/ci/requirements/py38-min-all-deps.yml
@@ -33,6 +33,7 @@ dependencies:
   - netcdf4=1.5.3
   - numba=0.51
   - numpy=1.18
+  - packaging=20.0
   - pandas=1.1
   - pint=0.16
   - pip
diff --git a/ci/requirements/py39-all-but-dask.yml b/ci/requirements/py39-all-but-dask.yml
index a66a6a50d65..f05745ee1fa 100644
--- a/ci/requirements/py39-all-but-dask.yml
+++ b/ci/requirements/py39-all-but-dask.yml
@@ -25,6 +25,7 @@ dependencies:
   - numba
   - numpy
   - numpy_groupies
+  - packaging
   - pandas
   - pint
   - pip
diff --git a/design_notes/flexible_indexes_notes.md b/design_notes/flexible_indexes_notes.md
index c7eb718720c..b36ce3e46ed 100644
--- a/design_notes/flexible_indexes_notes.md
+++ b/design_notes/flexible_indexes_notes.md
@@ -133,7 +133,7 @@ A possible, more explicit solution to reuse a `pandas.MultiIndex` in a DataArray
 
 New indexes may also be built from existing sets of coordinates or variables in a Dataset/DataArray using the `.set_index()` method.
 
-The [current signature](http://xarray.pydata.org/en/stable/generated/xarray.DataArray.set_index.html#xarray.DataArray.set_index) of `.set_index()` is tailored to `pandas.MultiIndex` and tied to the concept of a dimension-index. It is therefore hardly reusable as-is in the context of flexible indexes proposed here.
+The [current signature](http://docs.xarray.dev/en/stable/generated/xarray.DataArray.set_index.html#xarray.DataArray.set_index) of `.set_index()` is tailored to `pandas.MultiIndex` and tied to the concept of a dimension-index. It is therefore hardly reusable as-is in the context of flexible indexes proposed here.
 
 The new signature may look like one of these:
 
diff --git a/doc/README.rst b/doc/README.rst
index 0579f85d85f..c1b6c63a4c0 100644
--- a/doc/README.rst
+++ b/doc/README.rst
@@ -3,4 +3,4 @@
 xarray
 ------
 
-You can find information about building the docs at our `Contributing page <http://xarray.pydata.org/en/latest/contributing.html#contributing-to-the-documentation>`_.
+You can find information about building the docs at our `Contributing page <http:/docs.xarray.dev/en/latest/contributing.html#contributing-to-the-documentation>`_.
diff --git a/doc/api.rst b/doc/api.rst
index b552bc6b4d2..d2c222da4db 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -106,6 +106,7 @@ Dataset contents
    Dataset.swap_dims
    Dataset.expand_dims
    Dataset.drop_vars
+   Dataset.drop_duplicates
    Dataset.drop_dims
    Dataset.set_coords
    Dataset.reset_coords
diff --git a/doc/conf.py b/doc/conf.py
index 93174c6aaec..8ce9efdce88 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -28,9 +28,9 @@
 print("python exec:", sys.executable)
 print("sys.path:", sys.path)
 
-if "conda" in sys.executable:
+if "CONDA_DEFAULT_ENV" in os.environ or "conda" in sys.executable:
     print("conda environment:")
-    subprocess.run(["conda", "list"])
+    subprocess.run([os.environ.get("CONDA_EXE", "conda"), "list"])
 else:
     print("pip environment:")
     subprocess.run([sys.executable, "-m", "pip", "list"])
@@ -260,12 +260,12 @@
 
 
 # configuration for sphinxext.opengraph
-ogp_site_url = "https://xarray.pydata.org/en/latest/"
-ogp_image = "https://xarray.pydata.org/en/stable/_static/dataset-diagram-logo.png"
+ogp_site_url = "https://docs.xarray.dev/en/latest/"
+ogp_image = "https://docs.xarray.dev/en/stable/_static/dataset-diagram-logo.png"
 ogp_custom_meta_tags = [
     '<meta name="twitter:card" content="summary_large_image" />',
     '<meta property="twitter:site" content="@xarray_dev" />',
-    '<meta name="image" property="og:image" content="https://xarray.pydata.org/en/stable/_static/dataset-diagram-logo.png" />',
+    '<meta name="image" property="og:image" content="https://docs.xarray.dev/en/stable/_static/dataset-diagram-logo.png" />',
 ]
 
 # Redirects for pages that were moved to new locations
diff --git a/doc/contributing.rst b/doc/contributing.rst
index f5653fcc65e..0913702fd83 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -95,14 +95,14 @@ version control to allow many people to work together on the project.
 
 Some great resources for learning Git:
 
-* the `GitHub help pages <http://help.github.com/>`_.
-* the `NumPy's documentation <http://docs.scipy.org/doc/numpy/dev/index.html>`_.
-* Matthew Brett's `Pydagogue <http://matthew-brett.github.io/pydagogue/>`_.
+* the `GitHub help pages <https://help.github.com/>`_.
+* the `NumPy's documentation <https://numpy.org/doc/stable/dev/index.html>`_.
+* Matthew Brett's `Pydagogue <https://matthew-brett.github.io/pydagogue/>`_.
 
 Getting started with Git
 ------------------------
 
-`GitHub has instructions <http://help.github.com/set-up-git-redirect>`__ for installing git,
+`GitHub has instructions <https://help.github.com/set-up-git-redirect>`__ for installing git,
 setting up your SSH key, and configuring git.  All these steps need to be completed before
 you can work seamlessly between your local repository and GitHub.
 
@@ -274,13 +274,13 @@ Some other important things to know about the docs:
       .. ipython:: python
 
           x = 2
-          x ** 3
+          x**3
 
   will be rendered as::
 
       In [1]: x = 2
 
-      In [2]: x ** 3
+      In [2]: x**3
       Out[2]: 8
 
   Almost all code examples in the docs are run (and the output saved) during the
@@ -455,7 +455,7 @@ it is worth getting in the habit of writing tests ahead of time so that this is
 Like many packages, *xarray* uses `pytest
 <http://doc.pytest.org/en/latest/>`_ and the convenient
 extensions in `numpy.testing
-<http://docs.scipy.org/doc/numpy/reference/routines.testing.html>`_.
+<https://numpy.org/doc/stable/reference/routines.testing.html>`_.
 
 Writing tests
 ~~~~~~~~~~~~~
@@ -855,15 +855,15 @@ GitHub. To delete it there do::
 PR checklist
 ------------
 
-- **Properly comment and document your code.** See `"Documenting your code" <https://xarray.pydata.org/en/stable/contributing.html#documenting-your-code>`_.
-- **Test that the documentation builds correctly** by typing ``make html`` in the ``doc`` directory. This is not strictly necessary, but this may be easier than waiting for CI to catch a mistake. See `"Contributing to the documentation" <https://xarray.pydata.org/en/stable/contributing.html#contributing-to-the-documentation>`_.
+- **Properly comment and document your code.** See `"Documenting your code" <https://docs.xarray.dev/en/stable/contributing.html#documenting-your-code>`_.
+- **Test that the documentation builds correctly** by typing ``make html`` in the ``doc`` directory. This is not strictly necessary, but this may be easier than waiting for CI to catch a mistake. See `"Contributing to the documentation" <https://docs.xarray.dev/en/stable/contributing.html#contributing-to-the-documentation>`_.
 - **Test your code**.
 
-    - Write new tests if needed. See `"Test-driven development/code writing" <https://xarray.pydata.org/en/stable/contributing.html#test-driven-development-code-writing>`_.
+    - Write new tests if needed. See `"Test-driven development/code writing" <https://docs.xarray.dev/en/stable/contributing.html#test-driven-development-code-writing>`_.
     - Test the code using `Pytest <http://doc.pytest.org/en/latest/>`_. Running all tests (type ``pytest`` in the root directory) takes a while, so feel free to only run the tests you think are needed based on your PR (example: ``pytest xarray/tests/test_dataarray.py``). CI will catch any failing tests.
     - By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a <tt>[test-upstream]</tt> tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a "[skip-ci]" tag to the first line of the commit message.
 
-- **Properly format your code** and verify that it passes the formatting guidelines set by `Black <https://black.readthedocs.io/en/stable/>`_ and `Flake8 <http://flake8.pycqa.org/en/latest/>`_. See `"Code formatting" <https://xarray.pydata.org/en/stablcontributing.html#code-formatting>`_. You can use `pre-commit <https://pre-commit.com/>`_ to run these automatically on each commit.
+- **Properly format your code** and verify that it passes the formatting guidelines set by `Black <https://black.readthedocs.io/en/stable/>`_ and `Flake8 <http://flake8.pycqa.org/en/latest/>`_. See `"Code formatting" <https://docs.xarray.dev/en/stablcontributing.html#code-formatting>`_. You can use `pre-commit <https://pre-commit.com/>`_ to run these automatically on each commit.
 
     - Run ``pre-commit run --all-files`` in the root directory. This may modify some files. Confirm and commit any formatting changes.
 
diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst
index a9cbf39b644..2b49b1529e1 100644
--- a/doc/ecosystem.rst
+++ b/doc/ecosystem.rst
@@ -17,15 +17,16 @@ Geosciences
 - `climpred <https://climpred.readthedocs.io>`_: Analysis of ensemble forecast models for climate prediction.
 - `geocube <https://corteva.github.io/geocube>`_: Tool to convert geopandas vector data into rasterized xarray data.
 - `GeoWombat <https://github.com/jgrss/geowombat>`_: Utilities for analysis of remotely sensed and gridded raster data at scale (easily tame Landsat, Sentinel, Quickbird, and PlanetScope).
+- `gsw-xarray <https://github.com/DocOtak/gsw-xarray>`_: a wrapper around `gsw <https://teos-10.github.io/GSW-Python>`_ that adds CF compliant attributes when possible, units, name.
 - `infinite-diff <https://github.com/spencerahill/infinite-diff>`_: xarray-based finite-differencing, focused on gridded climate/meteorology data
 - `marc_analysis <https://github.com/darothen/marc_analysis>`_: Analysis package for CESM/MARC experiments and output.
 - `MetPy <https://unidata.github.io/MetPy/dev/index.html>`_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data.
-- `MPAS-Analysis <http://mpas-analysis.readthedocs.io>`_: Analysis for simulations produced with Model for Prediction Across Scales (MPAS) components and the Accelerated Climate Model for Energy (ACME).
-- `OGGM <http://oggm.org/>`_: Open Global Glacier Model
+- `MPAS-Analysis <https://mpas-dev.github.io/MPAS-Analysis>`_: Analysis for simulations produced with Model for Prediction Across Scales (MPAS) components and the Accelerated Climate Model for Energy (ACME).
+- `OGGM <https://oggm.org/>`_: Open Global Glacier Model
 - `Oocgcm <https://oocgcm.readthedocs.io/>`_: Analysis of large gridded geophysical datasets
 - `Open Data Cube <https://www.opendatacube.org/>`_: Analysis toolkit of continental scale Earth Observation data from satellites.
 - `Pangaea: <https://pangaea.readthedocs.io/en/latest/>`_: xarray extension for gridded land surface & weather model output).
-- `Pangeo <https://pangeo-data.github.io>`_: A community effort for big data geoscience in the cloud.
+- `Pangeo <https://pangeo.io>`_: A community effort for big data geoscience in the cloud.
 - `PyGDX <https://pygdx.readthedocs.io/en/latest/>`_: Python 3 package for
   accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom
   subclass.
@@ -41,13 +42,13 @@ Geosciences
 - `wradlib <https://wradlib.org/>`_: An Open Source Library for Weather Radar Data Processing.
 - `wrf-python <https://wrf-python.readthedocs.io/>`_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model.
 - `xarray-simlab <https://xarray-simlab.readthedocs.io>`_: xarray extension for computer model simulations.
-- `xarray-spatial <https://makepath.github.io/xarray-spatial>`_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.)
-- `xarray-topo <https://gitext.gfz-potsdam.de/sec55-public/xarray-topo>`_: xarray extension for topographic analysis and modelling.
+- `xarray-spatial <https://xarray-spatial.org/>`_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.)
+- `xarray-topo <https://xarray-topo.readthedocs.io/>`_: xarray extension for topographic analysis and modelling.
 - `xbpch <https://github.com/darothen/xbpch>`_: xarray interface for bpch files.
 - `xclim <https://xclim.readthedocs.io/>`_: A library for calculating climate science indices with unit handling built from xarray and dask.
 - `xESMF <https://pangeo-xesmf.readthedocs.io/>`_: Universal regridder for geospatial data.
 - `xgcm <https://xgcm.readthedocs.io/>`_: Extends the xarray data model to understand finite volume grid cells (common in General Circulation Models) and provides interpolation and difference operations for such grids.
-- `xmitgcm <http://xgcm.readthedocs.io/>`_: a python package for reading `MITgcm <http://mitgcm.org/>`_ binary MDS files into xarray data structures.
+- `xmitgcm <http://xmitgcm.readthedocs.io/>`_: a python package for reading `MITgcm <https://mitgcm.org/>`_ binary MDS files into xarray data structures.
 - `xnemogcm <https://github.com/rcaneill/xnemogcm/>`_: a package to read `NEMO <https://nemo-ocean.eu/>`_ output files and add attributes to interface with xgcm.
 
 Machine Learning
@@ -57,6 +58,7 @@ Machine Learning
 - `Elm <https://ensemble-learning-models.readthedocs.io>`_: Parallel machine learning on xarray data structures
 - `sklearn-xarray (1) <https://phausamann.github.io/sklearn-xarray>`_: Combines scikit-learn and xarray (1).
 - `sklearn-xarray (2) <https://sklearn-xarray.readthedocs.io/en/latest/>`_: Combines scikit-learn and xarray (2).
+- `xbatcher <https://xbatcher.readthedocs.io>`_: Batch Generation from Xarray Datasets.
 
 Other domains
 ~~~~~~~~~~~~~
@@ -90,7 +92,7 @@ Visualization
 
 Non-Python projects
 ~~~~~~~~~~~~~~~~~~~
-- `xframe <https://github.com/QuantStack/xframe>`_: C++ data structures inspired by xarray.
+- `xframe <https://github.com/xtensor-stack/xframe>`_: C++ data structures inspired by xarray.
 - `AxisArrays <https://github.com/JuliaArrays/AxisArrays.jl>`_ and
   `NamedArrays <https://github.com/davidavdav/NamedArrays.jl>`_: similar data structures for Julia.
 
diff --git a/doc/examples/ROMS_ocean_model.ipynb b/doc/examples/ROMS_ocean_model.ipynb
index 82d7a8d58af..d5c76380525 100644
--- a/doc/examples/ROMS_ocean_model.ipynb
+++ b/doc/examples/ROMS_ocean_model.ipynb
@@ -77,7 +77,7 @@
     "ds = xr.tutorial.open_dataset(\"ROMS_example.nc\", chunks={\"ocean_time\": 1})\n",
     "\n",
     "# This is a way to turn on chunking and lazy evaluation. Opening with mfdataset, or\n",
-    "# setting the chunking in the open_dataset would also achive this.\n",
+    "# setting the chunking in the open_dataset would also achieve this.\n",
     "ds"
    ]
   },
diff --git a/doc/gallery.rst b/doc/gallery.rst
index 9e5284cc2ee..36eb39d1a53 100644
--- a/doc/gallery.rst
+++ b/doc/gallery.rst
@@ -116,7 +116,7 @@ External Examples
     ---
     :img-top: https://github.com/avatars/u/60833341?s=200&v=4
     ++++
-    .. link-button:: http://gallery.pangeo.io/
+    .. link-button:: https://gallery.pangeo.io/
         :type: url
         :text: Xarray and dask on the cloud with Pangeo
         :classes: btn-outline-dark btn-block stretched-link
diff --git a/doc/gallery/plot_colorbar_center.py b/doc/gallery/plot_colorbar_center.py
index 42d6448adf6..da3447a1f25 100644
--- a/doc/gallery/plot_colorbar_center.py
+++ b/doc/gallery/plot_colorbar_center.py
@@ -38,6 +38,6 @@
 ax4.set_title("Celsius: center=False")
 ax4.set_ylabel("")
 
-# Mke it nice
+# Make it nice
 plt.tight_layout()
 plt.show()
diff --git a/doc/gallery/plot_rasterio.py b/doc/gallery/plot_rasterio.py
index 8294e01975f..853923a38bd 100644
--- a/doc/gallery/plot_rasterio.py
+++ b/doc/gallery/plot_rasterio.py
@@ -23,7 +23,7 @@
 import xarray as xr
 
 # Read the data
-url = "https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif"
+url = "https://github.com/rasterio/rasterio/raw/master/tests/data/RGB.byte.tif"
 da = xr.open_rasterio(url)
 
 # Compute the lon/lat coordinates with pyproj
diff --git a/doc/gallery/plot_rasterio_rgb.py b/doc/gallery/plot_rasterio_rgb.py
index 758d4cd3c37..912224ac132 100644
--- a/doc/gallery/plot_rasterio_rgb.py
+++ b/doc/gallery/plot_rasterio_rgb.py
@@ -18,7 +18,7 @@
 import xarray as xr
 
 # Read the data
-url = "https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif"
+url = "https://github.com/rasterio/rasterio/raw/master/tests/data/RGB.byte.tif"
 da = xr.open_rasterio(url)
 
 # The data is in UTM projection. We have to set it manually until
diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst
index d6e1c812fb2..0eeb09c432c 100644
--- a/doc/getting-started-guide/faq.rst
+++ b/doc/getting-started-guide/faq.rst
@@ -136,7 +136,7 @@ With xarray, we draw a firm line between labels that the library understands
 example, we do not automatically interpret and enforce units or `CF
 conventions`_. (An exception is serialization to and from netCDF files.)
 
-.. _CF conventions: http://cfconventions.org/latest.html
+.. _CF conventions: https://cfconventions.org/latest.html
 
 An implication of this choice is that we do not propagate ``attrs`` through
 most operations unless explicitly flagged (some methods have a ``keep_attrs``
@@ -155,7 +155,7 @@ xarray, and have contributed a number of improvements and fixes upstream. Xarray
 does not yet support all of netCDF4-python's features, such as modifying files
 on-disk.
 
-__ https://github.com/Unidata/netcdf4-python
+__ https://unidata.github.io/netcdf4-python/
 
 Iris_ (supported by the UK Met office) provides similar tools for in-
 memory manipulation of labeled arrays, aimed specifically at weather and
@@ -166,13 +166,13 @@ different approaches to handling metadata: Iris strictly interprets
 integration with Cartopy_.
 
 .. _Iris: https://scitools-iris.readthedocs.io/en/stable/
-.. _Cartopy: http://scitools.org.uk/cartopy/docs/latest/
+.. _Cartopy: https://scitools.org.uk/cartopy/docs/latest/
 
 `UV-CDAT`__ is another Python library that implements in-memory netCDF-like
 variables and `tools for working with climate data`__.
 
-__ http://uvcdat.llnl.gov/
-__ http://drclimate.wordpress.com/2014/01/02/a-beginners-guide-to-scripting-with-uv-cdat/
+__ https://uvcdat.llnl.gov/
+__ https://drclimate.wordpress.com/2014/01/02/a-beginners-guide-to-scripting-with-uv-cdat/
 
 We think the design decisions we have made for xarray (namely, basing it on
 pandas) make it a faster and more flexible data analysis tool. That said, Iris
@@ -197,7 +197,7 @@ would certainly appreciate it. We recommend two citations.
 
      - Hoyer, S. & Hamman, J., (2017). xarray: N-D labeled Arrays and
        Datasets in Python. Journal of Open Research Software. 5(1), p.10.
-       DOI: http://doi.org/10.5334/jors.148
+       DOI: https://doi.org/10.5334/jors.148
 
        Here’s an example of a BibTeX entry::
 
@@ -210,7 +210,7 @@ would certainly appreciate it. We recommend two citations.
              year      = {2017},
              publisher = {Ubiquity Press},
              doi       = {10.5334/jors.148},
-             url       = {http://doi.org/10.5334/jors.148}
+             url       = {https://doi.org/10.5334/jors.148}
            }
 
   2. You may also want to cite a specific version of the xarray package. We
diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst
index 6f437a2dc4c..6177ba0aaac 100644
--- a/doc/getting-started-guide/installing.rst
+++ b/doc/getting-started-guide/installing.rst
@@ -8,6 +8,7 @@ Required dependencies
 
 - Python (3.8 or later)
 - `numpy <https://www.numpy.org/>`__ (1.18 or later)
+- `packaging <https://packaging.pypa.io/en/latest/#>`__ (20.0 or later)
 - `pandas <https://pandas.pydata.org/>`__ (1.1 or later)
 
 .. _optional-dependencies:
@@ -26,21 +27,21 @@ For netCDF and IO
 
 - `netCDF4 <https://github.com/Unidata/netcdf4-python>`__: recommended if you
   want to use xarray for reading or writing netCDF files
-- `scipy <http://scipy.org/>`__: used as a fallback for reading/writing netCDF3
-- `pydap <http://www.pydap.org/>`__: used as a fallback for accessing OPeNDAP
-- `h5netcdf <https://github.com/shoyer/h5netcdf>`__: an alternative library for
+- `scipy <https://scipy.org>`__: used as a fallback for reading/writing netCDF3
+- `pydap <https://www.pydap.org>`__: used as a fallback for accessing OPeNDAP
+- `h5netcdf <https://github.com/h5netcdf/h5netcdf>`__: an alternative library for
   reading and writing netCDF4 files that does not use the netCDF-C libraries
 - `PyNIO <https://www.pyngl.ucar.edu/Nio.shtml>`__: for reading GRIB and other
   geoscience specific file formats. Note that PyNIO is not available for Windows and
   that the PyNIO backend may be moved outside of xarray in the future.
-- `zarr <http://zarr.readthedocs.io/>`__: for chunked, compressed, N-dimensional arrays.
+- `zarr <https://zarr.readthedocs.io>`__: for chunked, compressed, N-dimensional arrays.
 - `cftime <https://unidata.github.io/cftime>`__: recommended if you
   want to encode/decode datetimes for non-standard calendars or dates before
   year 1678 or after year 2262.
 - `PseudoNetCDF <http://github.com/barronh/pseudonetcdf/>`__: recommended
   for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files
   (ffi1001) and many other.
-- `rasterio <https://github.com/mapbox/rasterio>`__: for reading GeoTiffs and
+- `rasterio <https://github.com/rasterio/rasterio>`__: for reading GeoTiffs and
   other gridded raster datasets.
 - `iris <https://github.com/scitools/iris>`__: for conversion to and from iris'
   Cube objects
@@ -50,26 +51,26 @@ For netCDF and IO
 For accelerating xarray
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-- `scipy <http://scipy.org/>`__: necessary to enable the interpolation features for
+- `scipy <https://scipy.org/>`__: necessary to enable the interpolation features for
   xarray objects
 - `bottleneck <https://github.com/pydata/bottleneck>`__: speeds up
   NaN-skipping and rolling window aggregations by a large factor
-- `numbagg <https://github.com/shoyer/numbagg>`_: for exponential rolling
+- `numbagg <https://github.com/numbagg/numbagg>`_: for exponential rolling
   window operations
 
 For parallel computing
 ~~~~~~~~~~~~~~~~~~~~~~
 
-- `dask.array <http://dask.pydata.org>`__: required for :ref:`dask`.
+- `dask.array <https://docs.dask.org>`__: required for :ref:`dask`.
 
 For plotting
 ~~~~~~~~~~~~
 
-- `matplotlib <http://matplotlib.org/>`__: required for :ref:`plotting`
-- `cartopy <http://scitools.org.uk/cartopy/>`__: recommended for :ref:`plot-maps`
-- `seaborn <http://seaborn.pydata.org/>`__: for better
+- `matplotlib <https://matplotlib.org>`__: required for :ref:`plotting`
+- `cartopy <https://scitools.org.uk/cartopy>`__: recommended for :ref:`plot-maps`
+- `seaborn <https://seaborn.pydata.org>`__: for better
   color palettes
-- `nc-time-axis <https://github.com/SciTools/nc-time-axis>`__: for plotting
+- `nc-time-axis <https://nc-time-axis.readthedocs.io>`__: for plotting
   cftime.datetime objects
 
 Alternative data containers
@@ -114,11 +115,11 @@ with its recommended dependencies using the conda command line tool::
 
     $ conda install -c conda-forge xarray dask netCDF4 bottleneck
 
-.. _conda: http://conda.io/
+.. _conda: https://docs.conda.io
 
 If you require other :ref:`optional-dependencies` add them to the line above.
 
-We recommend using the community maintained `conda-forge <https://conda-forge.github.io/>`__ channel,
+We recommend using the community maintained `conda-forge <https://conda-forge.org>`__ channel,
 as some of the dependencies are difficult to build. New releases may also appear in conda-forge before
 being updated in the default channel.
 
diff --git a/doc/getting-started-guide/quick-overview.rst b/doc/getting-started-guide/quick-overview.rst
index 5bb5bb88ad3..ee13fea8bf1 100644
--- a/doc/getting-started-guide/quick-overview.rst
+++ b/doc/getting-started-guide/quick-overview.rst
@@ -69,7 +69,7 @@ Unlike positional indexing, label-based indexing frees us from having to know ho
 Attributes
 ----------
 
-While you're setting up your DataArray, it's often a good idea to set metadata attributes. A useful choice is to set ``data.attrs['long_name']`` and ``data.attrs['units']`` since xarray will use these, if present, to automatically label your plots. These special names were chosen following the `NetCDF Climate and Forecast (CF) Metadata Conventions <http://cfconventions.org/cf-conventions/cf-conventions.html>`_. ``attrs`` is just a Python dictionary, so you can assign anything you wish.
+While you're setting up your DataArray, it's often a good idea to set metadata attributes. A useful choice is to set ``data.attrs['long_name']`` and ``data.attrs['units']`` since xarray will use these, if present, to automatically label your plots. These special names were chosen following the `NetCDF Climate and Forecast (CF) Metadata Conventions <https://cfconventions.org/cf-conventions/cf-conventions.html>`_. ``attrs`` is just a Python dictionary, so you can assign anything you wish.
 
 .. ipython:: python
 
@@ -215,13 +215,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D
 .. ipython:: python
 
     ds.to_netcdf("example.nc")
-    xr.open_dataset("example.nc")
+    reopened = xr.open_dataset("example.nc")
+    reopened
 
 .. ipython:: python
     :suppress:
 
     import os
 
+    reopened.close()
     os.remove("example.nc")
 
 
diff --git a/doc/index.rst b/doc/index.rst
index cffa450b6e8..c549c33aa62 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -17,10 +17,10 @@ It is particularly tailored to working with netCDF_ files, which were the
 source of xarray's data model, and integrates tightly with dask_ for parallel
 computing.
 
-.. _NumPy: http://www.numpy.org
-.. _pandas: http://pandas.pydata.org
-.. _dask: http://dask.org
-.. _netCDF: http://www.unidata.ucar.edu/software/netcdf
+.. _NumPy: https://www.numpy.org
+.. _pandas: https://pandas.pydata.org
+.. _dask: https://dask.org
+.. _netCDF: https://www.unidata.ucar.edu/software/netcdf
 
 
 .. toctree::
@@ -98,7 +98,7 @@ Hoyer, Alex Kleeman and Eugene Brevdo and was released as open source in
 May 2014. The project was renamed from "xray" in January 2016. Xarray became a
 fiscally sponsored project of NumFOCUS_ in August 2018.
 
-__ http://climate.com/
+__ https://climate.com/
 .. _NumFOCUS: https://numfocus.org
 
 License
@@ -106,4 +106,4 @@ License
 
 Xarray is available under the open source `Apache License`__.
 
-__ http://www.apache.org/licenses/LICENSE-2.0.html
+__ https://www.apache.org/licenses/LICENSE-2.0.html
diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst
index 22216997273..506a8eb21be 100644
--- a/doc/internals/how-to-add-new-backend.rst
+++ b/doc/internals/how-to-add-new-backend.rst
@@ -172,6 +172,7 @@ Xarray :py:meth:`~xarray.open_dataset`, and returns a boolean.
 
 Decoders
 ^^^^^^^^
+
 The decoders implement specific operations to transform data from on-disk
 representation to Xarray representation.
 
@@ -199,6 +200,11 @@ performs the inverse transformation.
 
 In the following an example on how to use the coders ``decode`` method:
 
+.. ipython:: python
+    :suppress:
+
+    import xarray as xr
+
 .. ipython:: python
 
     var = xr.Variable(
@@ -239,7 +245,7 @@ interface only the boolean keywords related to the supported decoders.
 .. _RST backend_registration:
 
 How to register a backend
-+++++++++++++++++++++++++++
++++++++++++++++++++++++++
 
 Define a new entrypoint in your ``setup.py`` (or ``setup.cfg``) with:
 
@@ -273,15 +279,16 @@ If you are using `Poetry <https://python-poetry.org/>`_ for your build system, y
 
 .. code-block:: toml
 
-    [tool.poetry.plugins."xarray_backends"]
+    [tool.poetry.plugins."xarray.backends"]
     "my_engine" = "my_package.my_module:MyBackendEntryClass"
 
 See https://python-poetry.org/docs/pyproject/#plugins for more information on Poetry plugins.
 
 .. _RST lazy_loading:
 
-How to support Lazy Loading
+How to support lazy loading
 +++++++++++++++++++++++++++
+
 If you want to make your backend effective with big datasets, then you should
 support lazy loading.
 Basically, you shall replace the :py:class:`numpy.ndarray` inside the
@@ -311,15 +318,13 @@ The BackendArray subclass shall implement the following method and attributes:
 - the ``shape`` attribute
 - the ``dtype`` attribute.
 
-
-Xarray supports different type of
-`indexing <http://xarray.pydata.org/en/stable/indexing.html>`__, that can be
+Xarray supports different type of :doc:`/user-guide/indexing`, that can be
 grouped in three types of indexes
 :py:class:`~xarray.core.indexing.BasicIndexer`,
 :py:class:`~xarray.core.indexing.OuterIndexer` and
 :py:class:`~xarray.core.indexing.VectorizedIndexer`.
 This implies that the implementation of the method ``__getitem__`` can be tricky.
-In oder to simplify this task, Xarray provides a helper function,
+In order to simplify this task, Xarray provides a helper function,
 :py:func:`~xarray.core.indexing.explicit_indexing_adapter`, that transforms
 all the input  ``indexer`` types (`basic`, `outer`, `vectorized`) in a tuple
 which is interpreted correctly by your backend.
@@ -340,8 +345,8 @@ This is an example ``BackendArray`` subclass implementation:
             # other backend specific keyword arguments
         ):
             self.shape = shape
-            self.dtype = lock
-            self.lock = dtype
+            self.dtype = dtype
+            self.lock = lock
 
         def __getitem__(
             self, key: xarray.core.indexing.ExplicitIndexer
@@ -372,7 +377,7 @@ input the ``key``, the array ``shape`` and the following parameters:
 For more details see
 :py:class:`~xarray.core.indexing.IndexingSupport` and :ref:`RST indexing`.
 
-In order to support `Dask <http://dask.pydata.org/>`__ distributed and
+In order to support `Dask Distributed <https://distributed.dask.org/>`__ and
 :py:mod:`multiprocessing`, ``BackendArray`` subclass should be serializable
 either with :ref:`io.pickle` or
 `cloudpickle <https://github.com/cloudpipe/cloudpickle>`__.
@@ -382,8 +387,9 @@ opening files, we therefore suggest to use the helper class provided by Xarray
 
 .. _RST indexing:
 
-Indexing Examples
+Indexing examples
 ^^^^^^^^^^^^^^^^^
+
 **BASIC**
 
 In the ``BASIC`` indexing support, numbers and slices are supported.
@@ -428,7 +434,7 @@ The ``OUTER_1VECTOR`` indexing shall supports number, slices and at most one
 list. The behaviour with the list shall be the same of ``OUTER`` indexing.
 
 If you support more complex indexing as `explicit indexing` or
-`numpy indexing`, you can have a look to the implemetation of Zarr backend and Scipy backend,
+`numpy indexing`, you can have a look to the implementation of Zarr backend and Scipy backend,
 currently available in :py:mod:`~xarray.backends` module.
 
 .. _RST preferred_chunks:
@@ -436,7 +442,7 @@ currently available in :py:mod:`~xarray.backends` module.
 Backend preferred chunks
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-The backend is not directly involved in `Dask <http://dask.pydata.org/>`__
+The backend is not directly involved in `Dask <https://dask.org/>`__
 chunking, since it is internally managed by Xarray. However, the backend can
 define the preferred chunk size inside the variable’s encoding
 ``var.encoding["preferred_chunks"]``. The ``preferred_chunks`` may be useful
diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst
index 082d7984f59..f8bffa6e82f 100644
--- a/doc/internals/zarr-encoding-spec.rst
+++ b/doc/internals/zarr-encoding-spec.rst
@@ -5,7 +5,7 @@
 Zarr Encoding Specification
 ============================
 
-In implementing support for the `Zarr <https://zarr.readthedocs.io/>`_ storage
+In implementing support for the `Zarr <https://zarr.dev>`_ storage
 format, Xarray developers made some *ad hoc* choices about how to store
 NetCDF data in Zarr.
 Future versions of the Zarr spec will likely include a more formal convention
@@ -14,7 +14,7 @@ for the storage of the NetCDF data model in Zarr; see
 discussion.
 
 First, Xarray can only read and write Zarr groups. There is currently no support
-for reading / writting individual Zarr arrays. Zarr groups are mapped to
+for reading / writing individual Zarr arrays. Zarr groups are mapped to
 Xarray ``Dataset`` objects.
 
 Second, from Xarray's point of view, the key difference between
@@ -63,3 +63,10 @@ re-open it directly with Zarr:
     print(os.listdir("rasm.zarr"))
     print(zgroup.tree())
     dict(zgroup["Tair"].attrs)
+
+.. ipython:: python
+    :suppress:
+
+    import shutil
+
+    shutil.rmtree("rasm.zarr")
diff --git a/doc/roadmap.rst b/doc/roadmap.rst
index b6ccb8d73db..d4098cfd35a 100644
--- a/doc/roadmap.rst
+++ b/doc/roadmap.rst
@@ -20,7 +20,7 @@ Why has xarray been successful? In our opinion:
 
    -  The dominant use-case for xarray is for analysis of gridded
       dataset in the geosciences, e.g., as part of the
-      `Pangeo <http://pangeo.io>`__ project.
+      `Pangeo <https://pangeo.io>`__ project.
    -  Xarray is also used more broadly in the physical sciences, where
       we've found the needs for analyzing multidimensional datasets are
       remarkably consistent (e.g., see
@@ -112,7 +112,7 @@ A cleaner model would be to elevate ``indexes`` to an explicit part of
 xarray's data model, e.g., as attributes on the ``Dataset`` and
 ``DataArray`` classes. Indexes would need to be propagated along with
 coordinates in xarray operations, but will no longer would need to have
-a one-to-one correspondance with coordinate variables. Instead, an index
+a one-to-one correspondence with coordinate variables. Instead, an index
 should be able to refer to multiple (possibly multidimensional)
 coordinates that define it. See `GH
 1603 <https://github.com/pydata/xarray/issues/1603>`__ for full details
diff --git a/doc/tutorials-and-videos.rst b/doc/tutorials-and-videos.rst
index 0a266c4f4a7..6a9602bcfa6 100644
--- a/doc/tutorials-and-videos.rst
+++ b/doc/tutorials-and-videos.rst
@@ -62,8 +62,8 @@ Books, Chapters and Articles
 
 
 .. _Xarray's Tutorials: https://xarray-contrib.github.io/xarray-tutorial/
-.. _Journal of Open Research Software paper: http://doi.org/10.5334/jors.148
+.. _Journal of Open Research Software paper: https://doi.org/10.5334/jors.148
 .. _UW eScience Institute's Geohackweek : https://geohackweek.github.io/nDarrays/
 .. _tutorial: https://github.com/Unidata/unidata-users-workshop/blob/master/notebooks/xray-tutorial.ipynb
 .. _with answers: https://github.com/Unidata/unidata-users-workshop/blob/master/notebooks/xray-tutorial-with-answers.ipynb
-.. _Nicolas Fauchereau's 2015 tutorial: http://nbviewer.iPython.org/github/nicolasfauchereau/metocean/blob/master/notebooks/xray.ipynb
+.. _Nicolas Fauchereau's 2015 tutorial: https://nbviewer.iPython.org/github/nicolasfauchereau/metocean/blob/master/notebooks/xray.ipynb
diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst
index 1c9ea4df6f5..de2afa9060c 100644
--- a/doc/user-guide/computation.rst
+++ b/doc/user-guide/computation.rst
@@ -38,7 +38,7 @@ numpy) over all array values:
 You can also use any of numpy's or scipy's many `ufunc`__ functions directly on
 a DataArray:
 
-__ http://docs.scipy.org/doc/numpy/reference/ufuncs.html
+__ https://numpy.org/doc/stable/reference/ufuncs.html
 
 .. ipython:: python
 
@@ -202,7 +202,7 @@ From version 0.17, xarray supports multidimensional rolling,
 
    Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects with 1d-rolling.
 
-.. _bottleneck: https://github.com/pydata/bottleneck/
+.. _bottleneck: https://github.com/pydata/bottleneck
 
 We can also manually iterate through ``Rolling`` objects:
 
@@ -218,7 +218,7 @@ While ``rolling`` provides a simple moving average, ``DataArray`` also supports
 an exponential moving average with :py:meth:`~xarray.DataArray.rolling_exp`.
 This is similar to pandas' ``ewm`` method. numbagg_ is required.
 
-.. _numbagg: https://github.com/shoyer/numbagg
+.. _numbagg: https://github.com/numbagg/numbagg
 
 .. code:: python
 
@@ -366,7 +366,7 @@ methods. This supports the block aggregation along multiple dimensions,
 .. ipython:: python
 
     x = np.linspace(0, 10, 300)
-    t = pd.date_range("15/12/1999", periods=364)
+    t = pd.date_range("1999-12-15", periods=364)
     da = xr.DataArray(
         np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]),
         dims=["time", "x"],
@@ -746,7 +746,7 @@ However, adding support for labels on both :py:class:`~xarray.Dataset` and
 To make this easier, xarray supplies the :py:func:`~xarray.apply_ufunc` helper
 function, designed for wrapping functions that support broadcasting and
 vectorization on unlabeled arrays in the style of a NumPy
-`universal function <https://docs.scipy.org/doc/numpy-1.13.0/reference/ufuncs.html>`_ ("ufunc" for short).
+`universal function <https://numpy.org/doc/stable/reference/ufuncs.html>`_ ("ufunc" for short).
 ``apply_ufunc`` takes care of everything needed for an idiomatic xarray wrapper,
 including alignment, broadcasting, looping over ``Dataset`` variables (if
 needed), and merging of coordinates. In fact, many internal xarray
@@ -763,7 +763,7 @@ any additional arguments:
 
 For using more complex operations that consider some array values collectively,
 it's important to understand the idea of "core dimensions" from NumPy's
-`generalized ufuncs <http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. Core dimensions are defined as dimensions
+`generalized ufuncs <https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html>`_. Core dimensions are defined as dimensions
 that should *not* be broadcast over. Usually, they correspond to the fundamental
 dimensions over which an operation is defined, e.g., the summed axis in
 ``np.sum``. A good clue that core dimensions are needed is the presence of an
diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst
index 4998cc68828..5110a970390 100644
--- a/doc/user-guide/dask.rst
+++ b/doc/user-guide/dask.rst
@@ -5,7 +5,7 @@
 Parallel computing with Dask
 ============================
 
-Xarray integrates with `Dask <http://dask.pydata.org/>`__ to support parallel
+Xarray integrates with `Dask <https://dask.org/>`__ to support parallel
 computations and streaming computation on datasets that don't fit into memory.
 Currently, Dask is an entirely optional feature for xarray. However, the
 benefits of using Dask are sufficiently strong that Dask may become a required
@@ -16,7 +16,7 @@ For a full example of how to use xarray's Dask integration, read the
 may be found at the `Pangeo project's gallery <http://gallery.pangeo.io/>`_
 and at the `Dask examples website <https://examples.dask.org/xarray.html>`_.
 
-.. _blog post introducing xarray and Dask: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/
+.. _blog post introducing xarray and Dask: https://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/
 
 What is a Dask array?
 ---------------------
@@ -39,7 +39,7 @@ The actual computation is controlled by a multi-processing or thread pool,
 which allows Dask to take full advantage of multiple processors available on
 most modern computers.
 
-For more details on Dask, read `its documentation <http://dask.pydata.org/>`__.
+For more details on Dask, read `its documentation <https://docs.dask.org/>`__.
 Note that xarray only makes use of ``dask.array`` and ``dask.delayed``.
 
 .. _dask.io:
@@ -55,6 +55,8 @@ argument to :py:func:`~xarray.open_dataset` or using the
 .. ipython:: python
     :suppress:
 
+    import os
+
     import numpy as np
     import pandas as pd
     import xarray as xr
@@ -129,6 +131,11 @@ will return a ``dask.delayed`` object that can be computed later.
     with ProgressBar():
         results = delayed_obj.compute()
 
+.. ipython:: python
+    :suppress:
+
+    os.remove("manipulated-example-data.nc")  # Was not opened.
+
 .. note::
 
     When using Dask's distributed scheduler to write NETCDF4 files,
@@ -147,13 +154,6 @@ A dataset can also be converted to a Dask DataFrame using :py:meth:`~xarray.Data
 
 Dask DataFrames do not support multi-indexes so the coordinate variables from the dataset are included as columns in the Dask DataFrame.
 
-.. ipython:: python
-    :suppress:
-
-    import os
-
-    os.remove("example-data.nc")
-    os.remove("manipulated-example-data.nc")
 
 Using Dask with xarray
 ----------------------
@@ -210,7 +210,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method:
 
 .. ipython:: python
 
-    ds = ds.persist()
+    persisted = ds.persist()
 
 :py:meth:`~xarray.Dataset.persist` is particularly useful when using a
 distributed cluster because the data will be loaded into distributed memory
@@ -225,18 +225,13 @@ disk.
 
 .. note::
    For more on the differences between :py:meth:`~xarray.Dataset.persist` and
-   :py:meth:`~xarray.Dataset.compute` see this `Stack Overflow answer <https://stackoverflow.com/questions/41806850/dask-difference-between-client-persist-and-client-compute>`_ and the `Dask documentation <https://distributed.readthedocs.io/en/latest/manage-computation.html#dask-collections-to-futures>`_.
+   :py:meth:`~xarray.Dataset.compute` see this `Stack Overflow answer <https://stackoverflow.com/questions/41806850/dask-difference-between-client-persist-and-client-compute>`_ and the `Dask documentation <https://distributed.dask.org/en/latest/manage-computation.html#dask-collections-to-futures>`_.
 
 For performance you may wish to consider chunk sizes.  The correct choice of
 chunk size depends both on your data and on the operations you want to perform.
 With xarray, both converting data to a Dask arrays and converting the chunk
 sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method:
 
-.. ipython:: python
-    :suppress:
-
-    ds = ds.chunk({"time": 10})
-
 .. ipython:: python
 
     rechunked = ds.chunk({"latitude": 100, "longitude": 100})
@@ -508,6 +503,11 @@ Notice that the 0-shaped sizes were not printed to screen. Since ``template`` ha
     expected = ds + 10 + 10
     mapped.identical(expected)
 
+.. ipython:: python
+    :suppress:
+
+    ds.close()  # Closes "example-data.nc".
+    os.remove("example-data.nc")
 
 .. tip::
 
diff --git a/doc/user-guide/data-structures.rst b/doc/user-guide/data-structures.rst
index 1322c51248d..e0fd4bd0d25 100644
--- a/doc/user-guide/data-structures.rst
+++ b/doc/user-guide/data-structures.rst
@@ -227,7 +227,7 @@ container of labeled arrays (:py:class:`~xarray.DataArray` objects) with aligned
 dimensions. It is designed as an in-memory representation of the data model
 from the `netCDF`__ file format.
 
-__ http://www.unidata.ucar.edu/software/netcdf/
+__ https://www.unidata.ucar.edu/software/netcdf/
 
 In addition to the dict-like interface of the dataset itself, which can be used
 to access any variable in a dataset, datasets have four key properties:
@@ -247,7 +247,7 @@ distinction for indexing and computations. Coordinates indicate
 constant/fixed/independent quantities, unlike the varying/measured/dependent
 quantities that belong in data.
 
-.. _CF conventions: http://cfconventions.org/
+.. _CF conventions: https://cfconventions.org/
 
 Here is an example of how we might structure a dataset for a weather forecast:
 
@@ -520,7 +520,7 @@ in xarray:
   "non-dimension coordinates" are called "auxiliary coordinate variables"
   (see :issue:`1295` for more details).
 
-.. _CF terminology: http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#terminology
+.. _CF terminology: https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#terminology
 
 
 Modifying coordinates
@@ -628,4 +628,4 @@ it is recommended that you explicitly set the names of the levels.
    at which the forecast was made, rather than ``time`` which is the valid time
    for which the forecast applies.
 
-__ http://en.wikipedia.org/wiki/Map_projection
+__ https://en.wikipedia.org/wiki/Map_projection
diff --git a/doc/user-guide/groupby.rst b/doc/user-guide/groupby.rst
index 4c4f8d473ce..98f88a3d4ec 100644
--- a/doc/user-guide/groupby.rst
+++ b/doc/user-guide/groupby.rst
@@ -6,8 +6,8 @@ GroupBy: split-apply-combine
 Xarray supports `"group by"`__ operations with the same API as pandas to
 implement the `split-apply-combine`__ strategy:
 
-__ http://pandas.pydata.org/pandas-docs/stable/groupby.html
-__ http://www.jstatsoft.org/v40/i01/paper
+__ https://pandas.pydata.org/pandas-docs/stable/groupby.html
+__ https://www.jstatsoft.org/v40/i01/paper
 
 - Split your data into multiple independent groups.
 - Apply some function to each group.
@@ -201,7 +201,7 @@ which is different from the logical grid dimensions (e.g. nx, ny). Such
 variables are valid under the `CF conventions`__. Xarray supports groupby
 operations over multidimensional coordinate variables:
 
-__ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimensional_latitude_longitude_coordinate_variables
+__ https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimensional_latitude_longitude_coordinate_variables
 
 .. ipython:: python
 
diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst
index 89f00466fa4..29b48bf7c47 100644
--- a/doc/user-guide/indexing.rst
+++ b/doc/user-guide/indexing.rst
@@ -97,7 +97,7 @@ including indexing with individual, slices and arrays of labels, as well as
 indexing with boolean arrays. Like pandas, label based indexing in xarray is
 *inclusive* of both the start and stop bounds.
 
-__ http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-label
+__ https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-label
 
 Setting values with label based indexing is also supported:
 
@@ -145,7 +145,7 @@ Python :py:class:`slice` objects or 1-dimensional arrays.
     brackets, but unfortunately, Python `does yet not support`__ indexing with
     keyword arguments like ``da[space=0]``
 
-__ http://legacy.python.org/dev/peps/pep-0472/
+__ https://legacy.python.org/dev/peps/pep-0472/
 
 
 .. _nearest neighbor lookups:
@@ -373,7 +373,7 @@ indexing for xarray is based on our
 :ref:`broadcasting rules <compute.broadcasting>`.
 See :ref:`indexing.rules` for the complete specification.
 
-.. _advanced indexing: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.indexing.html
+.. _advanced indexing: https://numpy.org/doc/stable/reference/arrays.indexing.html
 
 Vectorized indexing also works with ``isel``, ``loc``, and ``sel``:
 
@@ -503,7 +503,7 @@ This is because ``v[0] = v[0] - 1`` is called three times, rather than
 ``v[0] = v[0] - 1 - 1 - 1``.
 See `Assigning values to indexed arrays`__ for the details.
 
-__ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-to-indexed-arrays
+__ https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-indexed-arrays
 
 
 .. note::
@@ -751,7 +751,7 @@ Whether data is a copy or a view is more predictable in xarray than in pandas, s
 unlike pandas, xarray does not produce `SettingWithCopy warnings`_. However, you
 should still avoid assignment with chained indexing.
 
-.. _SettingWithCopy warnings: http://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy
+.. _SettingWithCopy warnings: https://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy
 
 
 .. _multi-level indexing:
diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
index 16b8708231e..ddde0bf5888 100644
--- a/doc/user-guide/io.rst
+++ b/doc/user-guide/io.rst
@@ -11,6 +11,8 @@ format (recommended).
 .. ipython:: python
     :suppress:
 
+    import os
+
     import numpy as np
     import pandas as pd
     import xarray as xr
@@ -33,14 +35,14 @@ NetCDF is supported on almost all platforms, and parsers exist
 for the vast majority of scientific programming languages. Recent versions of
 netCDF are based on the even more widely used HDF5 file-format.
 
-__ http://www.unidata.ucar.edu/software/netcdf/
+__ https://www.unidata.ucar.edu/software/netcdf/
 
 .. tip::
 
     If you aren't familiar with this data format, the `netCDF FAQ`_ is a good
     place to start.
 
-.. _netCDF FAQ: http://www.unidata.ucar.edu/software/netcdf/docs/faq.html#What-Is-netCDF
+.. _netCDF FAQ: https://www.unidata.ucar.edu/software/netcdf/docs/faq.html#What-Is-netCDF
 
 Reading and writing netCDF files with xarray requires scipy or the
 `netCDF4-Python`__ library to be installed (the latter is required to
@@ -70,7 +72,7 @@ the ``format`` and ``engine`` arguments.
 
 .. tip::
 
-   Using the `h5netcdf <https://github.com/shoyer/h5netcdf>`_  package
+   Using the `h5netcdf <https://github.com/h5netcdf/h5netcdf>`_  package
    by passing ``engine='h5netcdf'`` to :py:meth:`open_dataset` can
    sometimes be quicker than the default ``engine='netcdf4'`` that uses the
    `netCDF4 <https://github.com/Unidata/netcdf4-python>`_ package.
@@ -84,6 +86,13 @@ We can load netCDF files to create a new Dataset using
     ds_disk = xr.open_dataset("saved_on_disk.nc")
     ds_disk
 
+.. ipython:: python
+    :suppress:
+
+    # Close "saved_on_disk.nc", but retain the file until after closing or deleting other
+    # datasets that will refer to it.
+    ds_disk.close()
+
 Similarly, a DataArray can be saved to disk using the
 :py:meth:`DataArray.to_netcdf` method, and loaded
 from disk using the :py:func:`open_dataarray` function. As netCDF files
@@ -204,11 +213,6 @@ You can view this encoding information (among others) in the
 Note that all operations that manipulate variables other than indexing
 will remove encoding information.
 
-.. ipython:: python
-    :suppress:
-
-    ds_disk.close()
-
 
 .. _combining multiple files:
 
@@ -255,7 +259,7 @@ See its docstring for more details.
     (``compat='override'``).
 
 
-.. _dask: http://dask.pydata.org
+.. _dask: http://dask.org
 .. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/
 
 Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`open_mfdataset`.
@@ -430,7 +434,7 @@ in the `documentation for createVariable`_ for netCDF4-Python. This only works
 for netCDF4 files and thus requires using ``format='netCDF4'`` and either
 ``engine='netcdf4'`` or ``engine='h5netcdf'``.
 
-.. _documentation for createVariable: http://unidata.github.io/netcdf4-python/#netCDF4.Dataset.createVariable
+.. _documentation for createVariable: https://unidata.github.io/netcdf4-python/#netCDF4.Dataset.createVariable
 
 Chunk based gzip compression can yield impressive space savings, especially
 for sparse data, but it comes with significant performance overhead. HDF5
@@ -484,13 +488,13 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set:
     da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True)
 
     # Reading it back
-    xr.open_dataarray("complex.nc", engine="h5netcdf")
+    reopened = xr.open_dataarray("complex.nc", engine="h5netcdf")
+    reopened
 
 .. ipython:: python
     :suppress:
 
-    import os
-
+    reopened.close()
     os.remove("complex.nc")
 
 .. warning::
@@ -498,596 +502,599 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set:
   Note that this produces a file that is likely to be not readable by other netCDF
   libraries!
 
-.. _io.iris:
+.. _io.zarr:
 
-Iris
+Zarr
 ----
 
-The Iris_ tool allows easy reading of common meteorological and climate model formats
-(including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very
-similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is
-installed, xarray can convert a ``DataArray`` into a ``Cube`` using
-:py:meth:`DataArray.to_iris`:
-
-.. ipython:: python
+`Zarr`_ is a Python package that provides an implementation of chunked, compressed,
+N-dimensional arrays.
+Zarr has the ability to store arrays in a range of ways, including in memory,
+in files, and in cloud-based object storage such as `Amazon S3`_ and
+`Google Cloud Storage`_.
+Xarray's Zarr backend allows xarray to leverage these capabilities, including
+the ability to store and analyze datasets far too large fit onto disk
+(particularly :ref:`in combination with dask <dask>`).
 
-    da = xr.DataArray(
-        np.random.rand(4, 5),
-        dims=["x", "y"],
-        coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)),
-    )
+Xarray can't open just any zarr dataset, because xarray requires special
+metadata (attributes) describing the dataset dimensions and coordinates.
+At this time, xarray can only open zarr datasets that have been written by
+xarray. For implementation details, see :ref:`zarr_encoding`.
 
-    cube = da.to_iris()
-    cube
+To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method.
 
-Conversely, we can create a new ``DataArray`` object from a ``Cube`` using
-:py:meth:`DataArray.from_iris`:
+To write to a local directory, we pass a path to a directory:
 
 .. ipython:: python
+    :suppress:
 
-    da_cube = xr.DataArray.from_iris(cube)
-    da_cube
+    ! rm -rf path/to/directory.zarr
 
+.. ipython:: python
 
-.. _Iris: http://scitools.org.uk/iris
+    ds = xr.Dataset(
+        {"foo": (("x", "y"), np.random.rand(4, 5))},
+        coords={
+            "x": [10, 20, 30, 40],
+            "y": pd.date_range("2000-01-01", periods=5),
+            "z": ("x", list("abcd")),
+        },
+    )
+    ds.to_zarr("path/to/directory.zarr")
 
+(The suffix ``.zarr`` is optional--just a reminder that a zarr store lives
+there.) If the directory does not exist, it will be created. If a zarr
+store is already present at that path, an error will be raised, preventing it
+from being overwritten. To override this behavior and overwrite an existing
+store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`.
 
-OPeNDAP
--------
+To store variable length strings, convert them to object arrays first with
+``dtype=object``.
 
-Xarray includes support for `OPeNDAP`__ (via the netCDF4 library or Pydap), which
-lets us access large datasets over HTTP.
+To read back a zarr dataset that has been created this way, we use the
+:py:func:`open_zarr` method:
 
-__ http://www.opendap.org/
+.. ipython:: python
 
-For example, we can open a connection to GBs of weather data produced by the
-`PRISM`__ project, and hosted by `IRI`__ at Columbia:
+    ds_zarr = xr.open_zarr("path/to/directory.zarr")
+    ds_zarr
 
-__ http://www.prism.oregonstate.edu/
-__ http://iri.columbia.edu/
+Cloud Storage Buckets
+~~~~~~~~~~~~~~~~~~~~~
 
-.. ipython source code for this section
-   we don't use this to avoid hitting the DAP server on every doc build.
+It is possible to read and write xarray datasets directly from / to cloud
+storage buckets using zarr. This example uses the `gcsfs`_ package to provide
+an interface to `Google Cloud Storage`_.
 
-   remote_data = xr.open_dataset(
-       'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods',
-       decode_times=False)
-   tmax = remote_data.tmax[:500, ::3, ::3]
-   tmax
+From v0.16.2: general `fsspec`_ URLs are parsed and the store set up for you
+automatically when reading, such that you can open a dataset in a single
+call. You should include any arguments to the storage backend as the
+key ``storage_options``, part of ``backend_kwargs``.
 
-   @savefig opendap-prism-tmax.png
-   tmax[0].plot()
+.. code:: python
 
-.. ipython::
-    :verbatim:
+    ds_gcs = xr.open_dataset(
+        "gcs://<bucket-name>/path.zarr",
+        backend_kwargs={
+            "storage_options": {"project": "<project-name>", "token": None}
+        },
+        engine="zarr",
+    )
 
-    In [3]: remote_data = xr.open_dataset(
-       ...:     "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods",
-       ...:     decode_times=False,
-       ...: )
 
-    In [4]: remote_data
-    Out[4]:
-    <xarray.Dataset>
-    Dimensions:  (T: 1422, X: 1405, Y: 621)
-    Coordinates:
-      * X        (X) float32 -125.0 -124.958 -124.917 -124.875 -124.833 -124.792 -124.75 ...
-      * T        (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 -772.5 -771.5 ...
-      * Y        (Y) float32 49.9167 49.875 49.8333 49.7917 49.75 49.7083 49.6667 49.625 ...
-    Data variables:
-        ppt      (T, Y, X) float64 ...
-        tdmean   (T, Y, X) float64 ...
-        tmax     (T, Y, X) float64 ...
-        tmin     (T, Y, X) float64 ...
-    Attributes:
-        Conventions: IRIDL
-        expires: 1375315200
+This also works with ``open_mfdataset``, allowing you to pass a list of paths or
+a URL to be interpreted as a glob string.
 
-.. TODO: update this example to show off decode_cf?
+For older versions, and for writing, you must explicitly set up a ``MutableMapping``
+instance and pass this, as follows:
 
-.. note::
+.. code:: python
 
-    Like many real-world datasets, this dataset does not entirely follow
-    `CF conventions`_. Unexpected formats will usually cause xarray's automatic
-    decoding to fail. The way to work around this is to either set
-    ``decode_cf=False`` in ``open_dataset`` to turn off all use of CF
-    conventions, or by only disabling the troublesome parser.
-    In this case, we set ``decode_times=False`` because the time axis here
-    provides the calendar attribute in a format that xarray does not expect
-    (the integer ``360`` instead of a string like ``'360_day'``).
+    import gcsfs
 
-We can select and slice this data any number of times, and nothing is loaded
-over the network until we look at particular values:
+    fs = gcsfs.GCSFileSystem(project="<project-name>", token=None)
+    gcsmap = gcsfs.mapping.GCSMap("<bucket-name>", gcs=fs, check=True, create=False)
+    # write to the bucket
+    ds.to_zarr(store=gcsmap)
+    # read it back
+    ds_gcs = xr.open_zarr(gcsmap)
 
-.. ipython::
-    :verbatim:
+(or use the utility function ``fsspec.get_mapper()``).
 
-    In [4]: tmax = remote_data["tmax"][:500, ::3, ::3]
+.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/
+.. _Zarr: https://zarr.readthedocs.io/
+.. _Amazon S3: https://aws.amazon.com/s3/
+.. _Google Cloud Storage: https://cloud.google.com/storage/
+.. _gcsfs: https://github.com/fsspec/gcsfs
 
-    In [5]: tmax
-    Out[5]:
-    <xarray.DataArray 'tmax' (T: 500, Y: 207, X: 469)>
-    [48541500 values with dtype=float64]
-    Coordinates:
-      * Y        (Y) float32 49.9167 49.7917 49.6667 49.5417 49.4167 49.2917 ...
-      * X        (X) float32 -125.0 -124.875 -124.75 -124.625 -124.5 -124.375 ...
-      * T        (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 ...
-    Attributes:
-        pointwidth: 120
-        standard_name: air_temperature
-        units: Celsius_scale
-        expires: 1443657600
+Zarr Compressors and Filters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-    # the data is downloaded automatically when we make the plot
-    In [6]: tmax[0].plot()
+There are many different options for compression and filtering possible with
+zarr. These are described in the
+`zarr documentation <https://zarr.readthedocs.io/en/stable/tutorial.html#compressors>`_.
+These options can be passed to the ``to_zarr`` method as variable encoding.
+For example:
 
-.. image:: ../_static/opendap-prism-tmax.png
+.. ipython:: python
+    :suppress:
 
-Some servers require authentication before we can access the data. For this
-purpose we can explicitly create a :py:class:`backends.PydapDataStore`
-and pass in a `Requests`__ session object. For example for
-HTTP Basic authentication::
+    ! rm -rf foo.zarr
 
-    import xarray as xr
-    import requests
+.. ipython:: python
 
-    session = requests.Session()
-    session.auth = ('username', 'password')
+    import zarr
 
-    store = xr.backends.PydapDataStore.open('http://example.com/data',
-                                            session=session)
-    ds = xr.open_dataset(store)
+    compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2)
+    ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}})
 
-`Pydap's cas module`__ has functions that generate custom sessions for
-servers that use CAS single sign-on. For example, to connect to servers
-that require NASA's URS authentication::
+.. note::
 
-  import xarray as xr
-  from pydata.cas.urs import setup_session
+    Not all native zarr compression and filtering options have been tested with
+    xarray.
 
-  ds_url = 'https://gpm1.gesdisc.eosdis.nasa.gov/opendap/hyrax/example.nc'
+.. _io.zarr.consolidated_metadata:
 
-  session = setup_session('username', 'password', check_url=ds_url)
-  store = xr.backends.PydapDataStore.open(ds_url, session=session)
+Consolidated Metadata
+~~~~~~~~~~~~~~~~~~~~~
 
-  ds = xr.open_dataset(store)
+Xarray needs to read all of the zarr metadata when it opens a dataset.
+In some storage mediums, such as with cloud object storage (e.g. amazon S3),
+this can introduce significant overhead, because two separate HTTP calls to the
+object store must be made for each variable in the dataset.
+As of xarray version 0.18, xarray by default uses a feature called
+*consolidated metadata*, storing all metadata for the entire dataset with a
+single key (by default called ``.zmetadata``). This typically drastically speeds
+up opening the store. (For more information on this feature, consult the
+`zarr docs <https://zarr.readthedocs.io/en/latest/tutorial.html#consolidating-metadata>`_.)
 
-__ http://docs.python-requests.org
-__ http://pydap.readthedocs.io/en/latest/client.html#authentication
+By default, xarray writes consolidated metadata and attempts to read stores
+with consolidated metadata, falling back to use non-consolidated metadata for
+reads. Because this fall-back option is so much slower, xarray issues a
+``RuntimeWarning`` with guidance when reading with consolidated metadata fails:
 
-.. _io.pickle:
+    Failed to open Zarr store with consolidated metadata, falling back to try
+    reading non-consolidated metadata. This is typically much slower for
+    opening a dataset. To silence this warning, consider:
 
-Pickle
-------
+    1. Consolidating metadata in this existing store with
+       :py:func:`zarr.consolidate_metadata`.
+    2. Explicitly setting ``consolidated=False``, to avoid trying to read
+       consolidate metadata.
+    3. Explicitly setting ``consolidated=True``, to raise an error in this case
+       instead of falling back to try reading non-consolidated metadata.
 
-The simplest way to serialize an xarray object is to use Python's built-in pickle
-module:
+.. _io.zarr.appending:
 
-.. ipython:: python
+Appending to existing Zarr stores
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-    import pickle
+Xarray supports several ways of incrementally writing variables to a Zarr
+store. These options are useful for scenarios when it is infeasible or
+undesirable to write your entire dataset at once.
 
-    # use the highest protocol (-1) because it is way faster than the default
-    # text based pickle format
-    pkl = pickle.dumps(ds, protocol=-1)
+.. tip::
 
-    pickle.loads(pkl)
+    If you can load all of your data into a single ``Dataset`` using dask, a
+    single call to ``to_zarr()`` will write all of your data in parallel.
 
-Pickling is important because it doesn't require any external libraries
-and lets you use xarray objects with Python modules like
-:py:mod:`multiprocessing` or :ref:`Dask <dask>`. However, pickling is
-**not recommended for long-term storage**.
-
-Restoring a pickle requires that the internal structure of the types for the
-pickled data remain unchanged. Because the internal design of xarray is still
-being refined, we make no guarantees (at this point) that objects pickled with
-this version of xarray will work in future versions.
-
-.. note::
-
-  When pickling an object opened from a NetCDF file, the pickle file will
-  contain a reference to the file on disk. If you want to store the actual
-  array values, load it into memory first with :py:meth:`Dataset.load`
-  or :py:meth:`Dataset.compute`.
+.. warning::
 
-.. _dictionary io:
+    Alignment of coordinates is currently not checked when modifying an
+    existing Zarr store. It is up to the user to ensure that coordinates are
+    consistent.
 
-Dictionary
-----------
+To add or overwrite entire variables, simply call :py:meth:`~Dataset.to_zarr`
+with ``mode='a'`` on a Dataset containing the new variables, passing in an
+existing Zarr store or path to a Zarr store.
 
-We can convert a ``Dataset`` (or a ``DataArray``) to a dict using
-:py:meth:`Dataset.to_dict`:
+To resize and then append values along an existing dimension in a store, set
+``append_dim``. This is a good option if data always arives in a particular
+order, e.g., for time-stepping a simulation:
 
 .. ipython:: python
+    :suppress:
 
-    d = ds.to_dict()
-    d
-
-We can create a new xarray object from a dict using
-:py:meth:`Dataset.from_dict`:
+    ! rm -rf path/to/directory.zarr
 
 .. ipython:: python
 
-    ds_dict = xr.Dataset.from_dict(d)
-    ds_dict
+    ds1 = xr.Dataset(
+        {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))},
+        coords={
+            "x": [10, 20, 30, 40],
+            "y": [1, 2, 3, 4, 5],
+            "t": pd.date_range("2001-01-01", periods=2),
+        },
+    )
+    ds1.to_zarr("path/to/directory.zarr")
+    ds2 = xr.Dataset(
+        {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))},
+        coords={
+            "x": [10, 20, 30, 40],
+            "y": [1, 2, 3, 4, 5],
+            "t": pd.date_range("2001-01-03", periods=2),
+        },
+    )
+    ds2.to_zarr("path/to/directory.zarr", append_dim="t")
 
-Dictionary support allows for flexible use of xarray objects. It doesn't
-require external libraries and dicts can easily be pickled, or converted to
-json, or geojson. All the values are converted to lists, so dicts might
-be quite large.
+Finally, you can use ``region`` to write to limited regions of existing arrays
+in an existing Zarr store. This is a good option for writing data in parallel
+from independent processes.
 
-To export just the dataset schema without the data itself, use the
-``data=False`` option:
+To scale this up to writing large datasets, the first step is creating an
+initial Zarr store without writing all of its array data. This can be done by
+first creating a ``Dataset`` with dummy values stored in :ref:`dask <dask>`,
+and then calling ``to_zarr`` with ``compute=False`` to write only metadata
+(including ``attrs``) to Zarr:
 
 .. ipython:: python
+    :suppress:
 
-    ds.to_dict(data=False)
-
-This can be useful for generating indices of dataset contents to expose to
-search indices or other automated data discovery tools.
+    ! rm -rf path/to/directory.zarr
 
 .. ipython:: python
-    :suppress:
-
-    import os
 
-    os.remove("saved_on_disk.nc")
+    import dask.array
 
-.. _io.rasterio:
+    # The values of this dask array are entirely irrelevant; only the dtype,
+    # shape and chunks are used
+    dummies = dask.array.zeros(30, chunks=10)
+    ds = xr.Dataset({"foo": ("x", dummies)})
+    path = "path/to/directory.zarr"
+    # Now we write the metadata without computing any array values
+    ds.to_zarr(path, compute=False)
 
-Rasterio
---------
+Now, a Zarr store with the correct variable shapes and attributes exists that
+can be filled out by subsequent calls to ``to_zarr``. The ``region`` provides a
+mapping from dimension names to Python ``slice`` objects indicating where the
+data should be written (in index space, not coordinate space), e.g.,
 
-GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if
-rasterio is installed. Here is an example of how to use
-:py:func:`open_rasterio` to read one of rasterio's `test files`_:
+.. ipython:: python
 
-.. deprecated:: 0.20.0
+    # For convenience, we'll slice a single dataset, but in the real use-case
+    # we would create them separately possibly even from separate processes.
+    ds = xr.Dataset({"foo": ("x", np.arange(30))})
+    ds.isel(x=slice(0, 10)).to_zarr(path, region={"x": slice(0, 10)})
+    ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": slice(10, 20)})
+    ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)})
 
-        Deprecated in favor of rioxarray.
-        For information about transitioning, see:
-        https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html
+Concurrent writes with ``region`` are safe as long as they modify distinct
+chunks in the underlying Zarr arrays (or use an appropriate ``lock``).
 
-.. ipython::
-    :verbatim:
+As a safety check to make it harder to inadvertently override existing values,
+if you set ``region`` then *all* variables included in a Dataset must have
+dimensions included in ``region``. Other variables (typically coordinates)
+need to be explicitly dropped and/or written in a separate calls to ``to_zarr``
+with ``mode='a'``.
 
-    In [7]: rio = xr.open_rasterio("RGB.byte.tif")
+.. _io.iris:
 
-    In [8]: rio
-    Out[8]:
-    <xarray.DataArray (band: 3, y: 718, x: 791)>
-    [1703814 values with dtype=uint8]
-    Coordinates:
-      * band     (band) int64 1 2 3
-      * y        (y) float64 2.827e+06 2.826e+06 2.826e+06 2.826e+06 2.826e+06 ...
-      * x        (x) float64 1.021e+05 1.024e+05 1.027e+05 1.03e+05 1.033e+05 ...
-    Attributes:
-        res:        (300.0379266750948, 300.041782729805)
-        transform:  (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805, 28...
-        is_tiled:   0
-        crs:        +init=epsg:32618
+Iris
+----
 
+The Iris_ tool allows easy reading of common meteorological and climate model formats
+(including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very
+similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is
+installed, xarray can convert a ``DataArray`` into a ``Cube`` using
+:py:meth:`DataArray.to_iris`:
 
-The ``x`` and ``y`` coordinates are generated out of the file's metadata
-(``bounds``, ``width``, ``height``), and they can be understood as cartesian
-coordinates defined in the file's projection provided by the ``crs`` attribute.
-``crs`` is a PROJ4 string which can be parsed by e.g. `pyproj`_ or rasterio.
-See :ref:`/examples/visualization_gallery.ipynb#Parsing-rasterio-geocoordinates`
-for an example of how to convert these to longitudes and latitudes.
+.. ipython:: python
 
+    da = xr.DataArray(
+        np.random.rand(4, 5),
+        dims=["x", "y"],
+        coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)),
+    )
 
-Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other
-GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF.
-`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping.
+    cube = da.to_iris()
+    cube
 
-.. ipython::
-    :verbatim:
+Conversely, we can create a new ``DataArray`` object from a ``Cube`` using
+:py:meth:`DataArray.from_iris`:
 
-    In [1]: import rioxarray
+.. ipython:: python
 
-    In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif")
+    da_cube = xr.DataArray.from_iris(cube)
+    da_cube
 
-    In [3]: rds
-    Out[3]:
-    <xarray.DataArray (band: 3, y: 718, x: 791)>
-    [1703814 values with dtype=uint8]
-    Coordinates:
-      * band         (band) int64 1 2 3
-      * y            (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06
-      * x            (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05
-        spatial_ref  int64 0
-    Attributes:
-        STATISTICS_MAXIMUM:  255
-        STATISTICS_MEAN:     29.947726688477
-        STATISTICS_MINIMUM:  0
-        STATISTICS_STDDEV:   52.340921626611
-        transform:           (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827...
-        _FillValue:          0.0
-        scale_factor:        1.0
-        add_offset:          0.0
-        grid_mapping:        spatial_ref
 
-    In [4]: rds.rio.crs
-    Out[4]: CRS.from_epsg(32618)
+.. _Iris: https://scitools.org.uk/iris
 
-    In [5]: rds4326 = rds.rio.reproject("epsg:4326")
 
-    In [6]: rds4326.rio.crs
-    Out[6]: CRS.from_epsg(4326)
+OPeNDAP
+-------
 
-    In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif")
+Xarray includes support for `OPeNDAP`__ (via the netCDF4 library or Pydap), which
+lets us access large datasets over HTTP.
 
+__ https://www.opendap.org/
 
-.. _rasterio: https://rasterio.readthedocs.io/en/latest/
-.. _rioxarray: https://corteva.github.io/rioxarray/stable/
-.. _test files: https://github.com/mapbox/rasterio/blob/master/tests/data/RGB.byte.tif
-.. _pyproj: https://github.com/pyproj4/pyproj
+For example, we can open a connection to GBs of weather data produced by the
+`PRISM`__ project, and hosted by `IRI`__ at Columbia:
 
-.. _io.zarr:
+__ https://www.prism.oregonstate.edu/
+__ https://iri.columbia.edu/
 
-Zarr
-----
+.. ipython source code for this section
+   we don't use this to avoid hitting the DAP server on every doc build.
 
-`Zarr`_ is a Python package that provides an implementation of chunked, compressed,
-N-dimensional arrays.
-Zarr has the ability to store arrays in a range of ways, including in memory,
-in files, and in cloud-based object storage such as `Amazon S3`_ and
-`Google Cloud Storage`_.
-Xarray's Zarr backend allows xarray to leverage these capabilities, including
-the ability to store and analyze datasets far too large fit onto disk
-(particularly :ref:`in combination with dask <dask>`).
+   remote_data = xr.open_dataset(
+       'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods',
+       decode_times=False)
+   tmax = remote_data.tmax[:500, ::3, ::3]
+   tmax
 
-Xarray can't open just any zarr dataset, because xarray requires special
-metadata (attributes) describing the dataset dimensions and coordinates.
-At this time, xarray can only open zarr datasets that have been written by
-xarray. For implementation details, see :ref:`zarr_encoding`.
+   @savefig opendap-prism-tmax.png
+   tmax[0].plot()
 
-To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method.
+.. ipython::
+    :verbatim:
 
-To write to a local directory, we pass a path to a directory:
+    In [3]: remote_data = xr.open_dataset(
+       ...:     "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods",
+       ...:     decode_times=False,
+       ...: )
 
-.. ipython:: python
-    :suppress:
+    In [4]: remote_data
+    Out[4]:
+    <xarray.Dataset>
+    Dimensions:  (T: 1422, X: 1405, Y: 621)
+    Coordinates:
+      * X        (X) float32 -125.0 -124.958 -124.917 -124.875 -124.833 -124.792 -124.75 ...
+      * T        (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 -772.5 -771.5 ...
+      * Y        (Y) float32 49.9167 49.875 49.8333 49.7917 49.75 49.7083 49.6667 49.625 ...
+    Data variables:
+        ppt      (T, Y, X) float64 ...
+        tdmean   (T, Y, X) float64 ...
+        tmax     (T, Y, X) float64 ...
+        tmin     (T, Y, X) float64 ...
+    Attributes:
+        Conventions: IRIDL
+        expires: 1375315200
 
-    ! rm -rf path/to/directory.zarr
+.. TODO: update this example to show off decode_cf?
 
-.. ipython:: python
+.. note::
 
-    ds = xr.Dataset(
-        {"foo": (("x", "y"), np.random.rand(4, 5))},
-        coords={
-            "x": [10, 20, 30, 40],
-            "y": pd.date_range("2000-01-01", periods=5),
-            "z": ("x", list("abcd")),
-        },
-    )
-    ds.to_zarr("path/to/directory.zarr")
-
-(The suffix ``.zarr`` is optional--just a reminder that a zarr store lives
-there.) If the directory does not exist, it will be created. If a zarr
-store is already present at that path, an error will be raised, preventing it
-from being overwritten. To override this behavior and overwrite an existing
-store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`.
+    Like many real-world datasets, this dataset does not entirely follow
+    `CF conventions`_. Unexpected formats will usually cause xarray's automatic
+    decoding to fail. The way to work around this is to either set
+    ``decode_cf=False`` in ``open_dataset`` to turn off all use of CF
+    conventions, or by only disabling the troublesome parser.
+    In this case, we set ``decode_times=False`` because the time axis here
+    provides the calendar attribute in a format that xarray does not expect
+    (the integer ``360`` instead of a string like ``'360_day'``).
 
-To store variable length strings, convert them to object arrays first with
-``dtype=object``.
+We can select and slice this data any number of times, and nothing is loaded
+over the network until we look at particular values:
 
-To read back a zarr dataset that has been created this way, we use the
-:py:func:`open_zarr` method:
+.. ipython::
+    :verbatim:
 
-.. ipython:: python
+    In [4]: tmax = remote_data["tmax"][:500, ::3, ::3]
 
-    ds_zarr = xr.open_zarr("path/to/directory.zarr")
-    ds_zarr
+    In [5]: tmax
+    Out[5]:
+    <xarray.DataArray 'tmax' (T: 500, Y: 207, X: 469)>
+    [48541500 values with dtype=float64]
+    Coordinates:
+      * Y        (Y) float32 49.9167 49.7917 49.6667 49.5417 49.4167 49.2917 ...
+      * X        (X) float32 -125.0 -124.875 -124.75 -124.625 -124.5 -124.375 ...
+      * T        (T) float32 -779.5 -778.5 -777.5 -776.5 -775.5 -774.5 -773.5 ...
+    Attributes:
+        pointwidth: 120
+        standard_name: air_temperature
+        units: Celsius_scale
+        expires: 1443657600
 
-Cloud Storage Buckets
-~~~~~~~~~~~~~~~~~~~~~
+    # the data is downloaded automatically when we make the plot
+    In [6]: tmax[0].plot()
 
-It is possible to read and write xarray datasets directly from / to cloud
-storage buckets using zarr. This example uses the `gcsfs`_ package to provide
-an interface to `Google Cloud Storage`_.
+.. image:: ../_static/opendap-prism-tmax.png
 
-From v0.16.2: general `fsspec`_ URLs are parsed and the store set up for you
-automatically when reading, such that you can open a dataset in a single
-call. You should include any arguments to the storage backend as the
-key ``storage_options``, part of ``backend_kwargs``.
+Some servers require authentication before we can access the data. For this
+purpose we can explicitly create a :py:class:`backends.PydapDataStore`
+and pass in a `Requests`__ session object. For example for
+HTTP Basic authentication::
 
-.. code:: python
+    import xarray as xr
+    import requests
 
-    ds_gcs = xr.open_dataset(
-        "gcs://<bucket-name>/path.zarr",
-        backend_kwargs={
-            "storage_options": {"project": "<project-name>", "token": None}
-        },
-        engine="zarr",
-    )
+    session = requests.Session()
+    session.auth = ('username', 'password')
 
+    store = xr.backends.PydapDataStore.open('http://example.com/data',
+                                            session=session)
+    ds = xr.open_dataset(store)
 
-This also works with ``open_mfdataset``, allowing you to pass a list of paths or
-a URL to be interpreted as a glob string.
+`Pydap's cas module`__ has functions that generate custom sessions for
+servers that use CAS single sign-on. For example, to connect to servers
+that require NASA's URS authentication::
 
-For older versions, and for writing, you must explicitly set up a ``MutableMapping``
-instance and pass this, as follows:
+  import xarray as xr
+  from pydata.cas.urs import setup_session
 
-.. code:: python
+  ds_url = 'https://gpm1.gesdisc.eosdis.nasa.gov/opendap/hyrax/example.nc'
 
-    import gcsfs
+  session = setup_session('username', 'password', check_url=ds_url)
+  store = xr.backends.PydapDataStore.open(ds_url, session=session)
 
-    fs = gcsfs.GCSFileSystem(project="<project-name>", token=None)
-    gcsmap = gcsfs.mapping.GCSMap("<bucket-name>", gcs=fs, check=True, create=False)
-    # write to the bucket
-    ds.to_zarr(store=gcsmap)
-    # read it back
-    ds_gcs = xr.open_zarr(gcsmap)
+  ds = xr.open_dataset(store)
 
-(or use the utility function ``fsspec.get_mapper()``).
+__ https://docs.python-requests.org
+__ https://www.pydap.org/en/latest/client.html#authentication
 
-.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/
-.. _Zarr: http://zarr.readthedocs.io/
-.. _Amazon S3: https://aws.amazon.com/s3/
-.. _Google Cloud Storage: https://cloud.google.com/storage/
-.. _gcsfs: https://github.com/dask/gcsfs
+.. _io.pickle:
 
-Zarr Compressors and Filters
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Pickle
+------
 
-There are many different options for compression and filtering possible with
-zarr. These are described in the
-`zarr documentation <http://zarr.readthedocs.io/en/stable/tutorial.html#compressors>`_.
-These options can be passed to the ``to_zarr`` method as variable encoding.
-For example:
+The simplest way to serialize an xarray object is to use Python's built-in pickle
+module:
 
 .. ipython:: python
-    :suppress:
 
-    ! rm -rf foo.zarr
+    import pickle
 
-.. ipython:: python
+    # use the highest protocol (-1) because it is way faster than the default
+    # text based pickle format
+    pkl = pickle.dumps(ds, protocol=-1)
 
-    import zarr
+    pickle.loads(pkl)
 
-    compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2)
-    ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}})
+Pickling is important because it doesn't require any external libraries
+and lets you use xarray objects with Python modules like
+:py:mod:`multiprocessing` or :ref:`Dask <dask>`. However, pickling is
+**not recommended for long-term storage**.
+
+Restoring a pickle requires that the internal structure of the types for the
+pickled data remain unchanged. Because the internal design of xarray is still
+being refined, we make no guarantees (at this point) that objects pickled with
+this version of xarray will work in future versions.
 
 .. note::
 
-    Not all native zarr compression and filtering options have been tested with
-    xarray.
+  When pickling an object opened from a NetCDF file, the pickle file will
+  contain a reference to the file on disk. If you want to store the actual
+  array values, load it into memory first with :py:meth:`Dataset.load`
+  or :py:meth:`Dataset.compute`.
 
-.. _io.zarr.consolidated_metadata:
+.. _dictionary io:
 
-Consolidated Metadata
-~~~~~~~~~~~~~~~~~~~~~
+Dictionary
+----------
 
-Xarray needs to read all of the zarr metadata when it opens a dataset.
-In some storage mediums, such as with cloud object storage (e.g. amazon S3),
-this can introduce significant overhead, because two separate HTTP calls to the
-object store must be made for each variable in the dataset.
-As of xarray version 0.18, xarray by default uses a feature called
-*consolidated metadata*, storing all metadata for the entire dataset with a
-single key (by default called ``.zmetadata``). This typically drastically speeds
-up opening the store. (For more information on this feature, consult the
-`zarr docs <https://zarr.readthedocs.io/en/latest/tutorial.html#consolidating-metadata>`_.)
+We can convert a ``Dataset`` (or a ``DataArray``) to a dict using
+:py:meth:`Dataset.to_dict`:
 
-By default, xarray writes consolidated metadata and attempts to read stores
-with consolidated metadata, falling back to use non-consolidated metadata for
-reads. Because this fall-back option is so much slower, xarray issues a
-``RuntimeWarning`` with guidance when reading with consolidated metadata fails:
+.. ipython:: python
 
-    Failed to open Zarr store with consolidated metadata, falling back to try
-    reading non-consolidated metadata. This is typically much slower for
-    opening a dataset. To silence this warning, consider:
+    d = ds.to_dict()
+    d
 
-    1. Consolidating metadata in this existing store with
-       :py:func:`zarr.consolidate_metadata`.
-    2. Explicitly setting ``consolidated=False``, to avoid trying to read
-       consolidate metadata.
-    3. Explicitly setting ``consolidated=True``, to raise an error in this case
-       instead of falling back to try reading non-consolidated metadata.
+We can create a new xarray object from a dict using
+:py:meth:`Dataset.from_dict`:
 
-.. _io.zarr.appending:
+.. ipython:: python
 
-Appending to existing Zarr stores
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    ds_dict = xr.Dataset.from_dict(d)
+    ds_dict
 
-Xarray supports several ways of incrementally writing variables to a Zarr
-store. These options are useful for scenarios when it is infeasible or
-undesirable to write your entire dataset at once.
+Dictionary support allows for flexible use of xarray objects. It doesn't
+require external libraries and dicts can easily be pickled, or converted to
+json, or geojson. All the values are converted to lists, so dicts might
+be quite large.
 
-.. tip::
+To export just the dataset schema without the data itself, use the
+``data=False`` option:
 
-    If you can load all of your data into a single ``Dataset`` using dask, a
-    single call to ``to_zarr()`` will write all of your data in parallel.
+.. ipython:: python
 
-.. warning::
+    ds.to_dict(data=False)
 
-    Alignment of coordinates is currently not checked when modifying an
-    existing Zarr store. It is up to the user to ensure that coordinates are
-    consistent.
+.. ipython:: python
+    :suppress:
 
-To add or overwrite entire variables, simply call :py:meth:`~Dataset.to_zarr`
-with ``mode='a'`` on a Dataset containing the new variables, passing in an
-existing Zarr store or path to a Zarr store.
+    # We're now done with the dataset named `ds`.  Although the `with` statement closed
+    # the dataset, displaying the unpickled pickle of `ds` re-opened "saved_on_disk.nc".
+    # However, `ds` (rather than the unpickled dataset) refers to the open file.  Delete
+    # `ds` to close the file.
+    del ds
+    os.remove("saved_on_disk.nc")
 
-To resize and then append values along an existing dimension in a store, set
-``append_dim``. This is a good option if data always arives in a particular
-order, e.g., for time-stepping a simulation:
+This can be useful for generating indices of dataset contents to expose to
+search indices or other automated data discovery tools.
 
-.. ipython:: python
-    :suppress:
+.. _io.rasterio:
 
-    ! rm -rf path/to/directory.zarr
+Rasterio
+--------
 
-.. ipython:: python
+GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if
+rasterio is installed. Here is an example of how to use
+:py:func:`open_rasterio` to read one of rasterio's `test files`_:
 
-    ds1 = xr.Dataset(
-        {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))},
-        coords={
-            "x": [10, 20, 30, 40],
-            "y": [1, 2, 3, 4, 5],
-            "t": pd.date_range("2001-01-01", periods=2),
-        },
-    )
-    ds1.to_zarr("path/to/directory.zarr")
-    ds2 = xr.Dataset(
-        {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))},
-        coords={
-            "x": [10, 20, 30, 40],
-            "y": [1, 2, 3, 4, 5],
-            "t": pd.date_range("2001-01-03", periods=2),
-        },
-    )
-    ds2.to_zarr("path/to/directory.zarr", append_dim="t")
+.. deprecated:: 0.20.0
 
-Finally, you can use ``region`` to write to limited regions of existing arrays
-in an existing Zarr store. This is a good option for writing data in parallel
-from independent processes.
+        Deprecated in favor of rioxarray.
+        For information about transitioning, see:
+        https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html
 
-To scale this up to writing large datasets, the first step is creating an
-initial Zarr store without writing all of its array data. This can be done by
-first creating a ``Dataset`` with dummy values stored in :ref:`dask <dask>`,
-and then calling ``to_zarr`` with ``compute=False`` to write only metadata
-(including ``attrs``) to Zarr:
+.. ipython::
+    :verbatim:
 
-.. ipython:: python
-    :suppress:
+    In [7]: rio = xr.open_rasterio("RGB.byte.tif")
 
-    ! rm -rf path/to/directory.zarr
+    In [8]: rio
+    Out[8]:
+    <xarray.DataArray (band: 3, y: 718, x: 791)>
+    [1703814 values with dtype=uint8]
+    Coordinates:
+      * band     (band) int64 1 2 3
+      * y        (y) float64 2.827e+06 2.826e+06 2.826e+06 2.826e+06 2.826e+06 ...
+      * x        (x) float64 1.021e+05 1.024e+05 1.027e+05 1.03e+05 1.033e+05 ...
+    Attributes:
+        res:        (300.0379266750948, 300.041782729805)
+        transform:  (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805, 28...
+        is_tiled:   0
+        crs:        +init=epsg:32618
 
-.. ipython:: python
 
-    import dask.array
+The ``x`` and ``y`` coordinates are generated out of the file's metadata
+(``bounds``, ``width``, ``height``), and they can be understood as cartesian
+coordinates defined in the file's projection provided by the ``crs`` attribute.
+``crs`` is a PROJ4 string which can be parsed by e.g. `pyproj`_ or rasterio.
+See :ref:`/examples/visualization_gallery.ipynb#Parsing-rasterio-geocoordinates`
+for an example of how to convert these to longitudes and latitudes.
 
-    # The values of this dask array are entirely irrelevant; only the dtype,
-    # shape and chunks are used
-    dummies = dask.array.zeros(30, chunks=10)
-    ds = xr.Dataset({"foo": ("x", dummies)})
-    path = "path/to/directory.zarr"
-    # Now we write the metadata without computing any array values
-    ds.to_zarr(path, compute=False)
 
-Now, a Zarr store with the correct variable shapes and attributes exists that
-can be filled out by subsequent calls to ``to_zarr``. The ``region`` provides a
-mapping from dimension names to Python ``slice`` objects indicating where the
-data should be written (in index space, not coordinate space), e.g.,
+Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other
+GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF.
+`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping.
 
-.. ipython:: python
+.. ipython::
+    :verbatim:
 
-    # For convenience, we'll slice a single dataset, but in the real use-case
-    # we would create them separately possibly even from separate processes.
-    ds = xr.Dataset({"foo": ("x", np.arange(30))})
-    ds.isel(x=slice(0, 10)).to_zarr(path, region={"x": slice(0, 10)})
-    ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": slice(10, 20)})
-    ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)})
+    In [1]: import rioxarray
 
-Concurrent writes with ``region`` are safe as long as they modify distinct
-chunks in the underlying Zarr arrays (or use an appropriate ``lock``).
+    In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif")
 
-As a safety check to make it harder to inadvertently override existing values,
-if you set ``region`` then *all* variables included in a Dataset must have
-dimensions included in ``region``. Other variables (typically coordinates)
-need to be explicitly dropped and/or written in a separate calls to ``to_zarr``
-with ``mode='a'``.
+    In [3]: rds
+    Out[3]:
+    <xarray.DataArray (band: 3, y: 718, x: 791)>
+    [1703814 values with dtype=uint8]
+    Coordinates:
+      * band         (band) int64 1 2 3
+      * y            (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06
+      * x            (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05
+        spatial_ref  int64 0
+    Attributes:
+        STATISTICS_MAXIMUM:  255
+        STATISTICS_MEAN:     29.947726688477
+        STATISTICS_MINIMUM:  0
+        STATISTICS_STDDEV:   52.340921626611
+        transform:           (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827...
+        _FillValue:          0.0
+        scale_factor:        1.0
+        add_offset:          0.0
+        grid_mapping:        spatial_ref
+
+    In [4]: rds.rio.crs
+    Out[4]: CRS.from_epsg(32618)
+
+    In [5]: rds4326 = rds.rio.reproject("epsg:4326")
+
+    In [6]: rds4326.rio.crs
+    Out[6]: CRS.from_epsg(4326)
+
+    In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif")
+
+
+.. _rasterio: https://rasterio.readthedocs.io/en/latest/
+.. _rioxarray: https://corteva.github.io/rioxarray/stable/
+.. _test files: https://github.com/rasterio/rasterio/blob/master/tests/data/RGB.byte.tif
+.. _pyproj: https://github.com/pyproj4/pyproj
 
 .. _io.cfgrib:
 
@@ -1156,7 +1163,7 @@ To use PseudoNetCDF to read such files, supply
 Add ``backend_kwargs={'format': '<format name>'}`` where `<format name>`
 options are listed on the PseudoNetCDF page.
 
-.. _PseudoNetCDF: http://github.com/barronh/PseudoNetCDF
+.. _PseudoNetCDF: https://github.com/barronh/PseudoNetCDF
 
 
 CSV and other formats supported by pandas
diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst
index acf1d16b6ee..a376b0a5cb8 100644
--- a/doc/user-guide/pandas.rst
+++ b/doc/user-guide/pandas.rst
@@ -11,8 +11,8 @@ ecosystem. For example, for plotting labeled data, we highly recommend
 using the visualization `built in to pandas itself`__ or provided by the pandas
 aware libraries such as `Seaborn`__.
 
-__ http://pandas.pydata.org/pandas-docs/stable/visualization.html
-__ http://seaborn.pydata.org/
+__ https://pandas.pydata.org/pandas-docs/stable/visualization.html
+__ https://seaborn.pydata.org/
 
 .. ipython:: python
     :suppress:
@@ -32,7 +32,7 @@ Tabular data is easiest to work with when it meets the criteria for
 * Each column holds a different variable.
 * Each rows holds a different observation.
 
-__ http://www.jstatsoft.org/v59/i10/
+__ https://www.jstatsoft.org/v59/i10/
 
 In this "tidy data" format, we can represent any :py:class:`Dataset` and
 :py:class:`DataArray` in terms of :py:class:`~pandas.DataFrame` and
@@ -241,5 +241,5 @@ While the xarray docs are relatively complete, a few items stand out for Panel u
 
 While xarray may take some getting used to, it's worth it! If anything is unclear,
 please post an issue on `GitHub <https://github.com/pydata/xarray>`__ or
-`StackOverflow <http://stackoverflow.com/questions/tagged/python-xarray>`__,
+`StackOverflow <https://stackoverflow.com/questions/tagged/python-xarray>`__,
 and we'll endeavor to respond to the specific case or improve the general docs.
diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst
index 1dce65b191c..d81ba30f12f 100644
--- a/doc/user-guide/plotting.rst
+++ b/doc/user-guide/plotting.rst
@@ -20,7 +20,7 @@ nicely into a pandas DataFrame then you're better off using one of the more
 developed tools there.
 
 Xarray plotting functionality is a thin wrapper around the popular
-`matplotlib <http://matplotlib.org/>`_ library.
+`matplotlib <https://matplotlib.org/>`_ library.
 Matplotlib syntax and function names were copied as much as possible, which
 makes for an easy transition between the two.
 Matplotlib must be installed before xarray can plot.
@@ -32,11 +32,11 @@ needs to be installed.
 
 For more extensive plotting applications consider the following projects:
 
-- `Seaborn <http://seaborn.pydata.org/>`_: "provides
+- `Seaborn <https://seaborn.pydata.org/>`_: "provides
   a high-level interface for drawing attractive statistical graphics."
   Integrates well with pandas.
 
-- `HoloViews <http://holoviews.org/>`_
+- `HoloViews <https://holoviews.org/>`_
   and `GeoViews <https://geoviews.org/>`_: "Composable, declarative
   data structures for building even complex visualizations easily." Includes
   native support for xarray objects.
@@ -45,7 +45,7 @@ For more extensive plotting applications consider the following projects:
   dynamic plots (backed by ``Holoviews`` or ``Geoviews``) by adding a ``hvplot``
   accessor to DataArrays.
 
-- `Cartopy <http://scitools.org.uk/cartopy/>`_: Provides cartographic
+- `Cartopy <https://scitools.org.uk/cartopy/docs/latest/>`_: Provides cartographic
   tools.
 
 Imports
@@ -106,7 +106,7 @@ The simplest way to make a plot is to call the :py:func:`DataArray.plot()` metho
     @savefig plotting_1d_simple.png width=4in
     air1d.plot()
 
-Xarray uses the coordinate name along with metadata ``attrs.long_name``, ``attrs.standard_name``, ``DataArray.name`` and ``attrs.units`` (if available) to label the axes. The names ``long_name``, ``standard_name`` and ``units`` are copied from the `CF-conventions spec <http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch03s03.html>`_. When choosing names, the order of precedence is ``long_name``, ``standard_name`` and finally ``DataArray.name``. The y-axis label in the above plot was constructed from the ``long_name`` and ``units`` attributes of ``air1d``.
+Xarray uses the coordinate name along with metadata ``attrs.long_name``, ``attrs.standard_name``, ``DataArray.name`` and ``attrs.units`` (if available) to label the axes. The names ``long_name``, ``standard_name`` and ``units`` are copied from the `CF-conventions spec <https://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch03s03.html>`_. When choosing names, the order of precedence is ``long_name``, ``standard_name`` and finally ``DataArray.name``. The y-axis label in the above plot was constructed from the ``long_name`` and ``units`` attributes of ``air1d``.
 
 .. ipython:: python
 
@@ -123,7 +123,7 @@ matplotlib.pyplot.plot_ passing in the index and the array values as x and y, re
 So to make a line plot with blue triangles a matplotlib format string
 can be used:
 
-.. _matplotlib.pyplot.plot: http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot
+.. _matplotlib.pyplot.plot: https://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot
 
 .. ipython:: python
     :okwarning:
@@ -563,7 +563,7 @@ You can also specify a list of discrete colors through the ``colors`` argument:
     @savefig plotting_custom_colors_levels.png width=4in
     air2d.plot(levels=[0, 12, 18, 30], colors=flatui)
 
-Finally, if you have `Seaborn <http://seaborn.pydata.org/>`_
+Finally, if you have `Seaborn <https://seaborn.pydata.org/>`_
 installed, you can also specify a seaborn color palette to the ``cmap``
 argument. Note that ``levels`` *must* be specified with seaborn color palettes
 if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``,
@@ -687,7 +687,7 @@ The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.F
 that links a :py:class:`DataArray` to a matplotlib figure with a particular structure.
 This object can be used to control the behavior of the multiple plots.
 It borrows an API and code from `Seaborn's FacetGrid
-<http://seaborn.pydata.org/tutorial/axis_grids.html>`_.
+<https://seaborn.pydata.org/tutorial/axis_grids.html>`_.
 The structure is contained within the ``axes`` and ``name_dicts``
 attributes, both 2d NumPy object arrays.
 
@@ -1020,7 +1020,7 @@ You can however decide to infer the cell boundaries and use the
     yet. If you want to use these coordinates, you'll have to make the plots
     outside the xarray framework.
 
-.. _cell boundaries: http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-boundaries
+.. _cell boundaries: https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-boundaries
 
 One can also make line plots with multidimensional coordinates. In this case, ``hue`` must be a dimension name, not a coordinate name.
 
diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst
index 86dc5fbe51a..edfaaa49427 100644
--- a/doc/user-guide/reshaping.rst
+++ b/doc/user-guide/reshaping.rst
@@ -151,7 +151,7 @@ Stacking different variables together
 
 These stacking and unstacking operations are particularly useful for reshaping
 xarray objects for use in machine learning packages, such as `scikit-learn
-<http://scikit-learn.org/stable/>`_, that usually require two-dimensional numpy
+<https://scikit-learn.org>`_, that usually require two-dimensional numpy
 arrays as inputs. For datasets with only one variable, we only need ``stack``
 and ``unstack``, but combining multiple variables in a
 :py:class:`xarray.Dataset` is more complicated. If the variables in the dataset
diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst
index 1813c125eed..f6b9d0bc35b 100644
--- a/doc/user-guide/time-series.rst
+++ b/doc/user-guide/time-series.rst
@@ -46,7 +46,7 @@ When reading or writing netCDF files, xarray automatically decodes datetime and
 timedelta arrays using `CF conventions`_ (that is, by using a ``units``
 attribute like ``'days since 2000-01-01'``).
 
-.. _CF conventions: http://cfconventions.org
+.. _CF conventions: https://cfconventions.org
 
 .. note::
 
@@ -101,7 +101,7 @@ You can also select a particular time by indexing with a
 
     ds.sel(time=datetime.time(12))
 
-For more details, read the pandas documentation and the section on `Indexing Using Datetime Components <datetime_component_indexing>`_ (i.e. using the ``.dt`` acessor).
+For more details, read the pandas documentation and the section on `Indexing Using Datetime Components <datetime_component_indexing>`_ (i.e. using the ``.dt`` accessor).
 
 .. _dt_accessor:
 
@@ -111,7 +111,7 @@ Datetime components
 Similar `to pandas`_, the components of datetime objects contained in a
 given ``DataArray`` can be quickly computed using a special ``.dt`` accessor.
 
-.. _to pandas: http://pandas.pydata.org/pandas-docs/stable/basics.html#basics-dt-accessors
+.. _to pandas: https://pandas.pydata.org/pandas-docs/stable/basics.html#basics-dt-accessors
 
 .. ipython:: python
 
@@ -128,7 +128,7 @@ Xarray also supports a notion of "virtual" or "derived" coordinates for
 "day", "hour", "minute", "second", "dayofyear", "week", "dayofweek", "weekday"
 and "quarter":
 
-__ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components
+__ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components
 
 .. ipython:: python
 
@@ -150,7 +150,7 @@ You can use these shortcuts with both Datasets and DataArray coordinates.
 
 In addition, xarray supports rounding operations ``floor``, ``ceil``, and ``round``. These operations require that you supply a `rounding frequency as a string argument.`__
 
-__ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
+__ https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
 
 .. ipython:: python
 
@@ -200,7 +200,7 @@ For upsampling or downsampling temporal resolutions, xarray offers a
 offered by the pandas method of the same name. Resample uses essentially the
 same api as ``resample`` `in pandas`_.
 
-.. _in pandas: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#up-and-downsampling
+.. _in pandas: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#up-and-downsampling
 
 For example, we can downsample our dataset from hourly to 6-hourly:
 
diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst
index 893e7b50429..3c957978acf 100644
--- a/doc/user-guide/weather-climate.rst
+++ b/doc/user-guide/weather-climate.rst
@@ -12,7 +12,7 @@ Weather and climate data
 
 Xarray can leverage metadata that follows the `Climate and Forecast (CF) conventions`_ if present. Examples include automatic labelling of plots with descriptive names and units if proper metadata is present (see :ref:`plotting`) and support for non-standard calendars used in climate science through the ``cftime`` module (see :ref:`CFTimeIndex`). There are also a number of geosciences-focused projects that build on xarray (see :ref:`ecosystem`).
 
-.. _Climate and Forecast (CF) conventions: http://cfconventions.org
+.. _Climate and Forecast (CF) conventions: https://cfconventions.org
 
 .. _cf_variables:
 
@@ -218,13 +218,15 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
 .. ipython:: python
 
     da.to_netcdf("example-no-leap.nc")
-    xr.open_dataset("example-no-leap.nc")
+    reopened = xr.open_dataset("example-no-leap.nc")
+    reopened
 
 .. ipython:: python
     :suppress:
 
     import os
 
+    reopened.close()
     os.remove("example-no-leap.nc")
 
 - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`:
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 73cc15b50ff..b22c6e4d858 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -14,25 +14,140 @@ What's New
 
     np.random.seed(123456)
 
-.. _whats-new.0.X.Y+1:
+.. _whats-new.2022.03.1:
+
+v2022.03.1 (unreleased)
+-----------------------
+
+New Features
+~~~~~~~~~~~~
+
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+
+Deprecations
+~~~~~~~~~~~~
+
+
+Bug fixes
+~~~~~~~~~
+
+- Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and
+  ensure it skips missing values for float dtypes (consistent with other methods). This should
+  not change the behavior (:pull:`6303`). By `Mathias Hauser <https://github.com/mathause>`_.
+
+Documentation
+~~~~~~~~~~~~~
+
+
+Internal Changes
+~~~~~~~~~~~~~~~~
+
+
+.. _whats-new.2022.02.0:
+.. _whats-new.2022.03.0:
+
+v2022.03.0 (2 March 2022)
+-------------------------
+
+This release brings a number of small improvements, as well as a move to `calendar versioning <https://calver.org/>`_ (:issue:`6176`).
+
+Many thanks to the 16 contributors to the v2022.02.0 release!
+
+Aaron Spring, Alan D. Snow, Anderson Banihirwe, crusaderky, Illviljan, Joe Hamman, Jonas Gliß,
+Lukas Pilz, Martin Bergemann, Mathias Hauser, Maximilian Roos, Romain Caneill, Stan West, Stijn Van Hoey,
+Tobias Kölling, and Tom Nicholas.
+
+
+New Features
+~~~~~~~~~~~~
+
+- Enabled multiplying tick offsets by floats. Allows ``float`` ``n`` in
+  :py:meth:`CFTimeIndex.shift` if ``shift_freq`` is between ``Day``
+  and ``Microsecond``. (:issue:`6134`, :pull:`6135`).
+  By `Aaron Spring <https://github.com/aaronspring>`_.
+- Enable providing more keyword arguments to the `pydap` backend when reading
+  OpenDAP datasets (:issue:`6274`).
+  By `Jonas Gliß <https://github.com/jgliss>`.
+- Allow :py:meth:`DataArray.drop_duplicates` to drop duplicates along multiple dimensions at once,
+  and add :py:meth:`Dataset.drop_duplicates`. (:pull:`6307`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+- Renamed the ``interpolation`` keyword of all ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`)
+  to ``method`` for consistency with numpy v1.22.0 (:pull:`6108`).
+  By `Mathias Hauser <https://github.com/mathause>`_.
+
+Deprecations
+~~~~~~~~~~~~
+
+
+Bug fixes
+~~~~~~~~~
+
+- Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size
+  can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling <https://github.com/d70-t>`_.
+- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`).  By `Martin Bergemann <https://github.com/antarcticrainforest>`_ and `Stan West <https://github.com/stanwest>`_.
+
+Documentation
+~~~~~~~~~~~~~
+
+- Delete files of datasets saved to disk while building the documentation and enable
+  building on Windows via `sphinx-build` (:pull:`6237`).
+  By `Stan West <https://github.com/stanwest>`_.
+
+
+Internal Changes
+~~~~~~~~~~~~~~~~
+
+
+.. _whats-new.0.21.1:
+
+v0.21.1 (31 January 2022)
+-------------------------
+
+This is a bugfix release to resolve (:issue:`6216`, :pull:`6207`).
+
+Bug fixes
+~~~~~~~~~
+- Add `packaging` as a dependency to Xarray (:issue:`6216`, :pull:`6207`).
+  By `Sebastian Weigand <https://github.com/s-weigand>`_ and `Joe Hamman <https://github.com/jhamman>`_.
+
+
+.. _whats-new.0.21.0:
+
+v0.21.0 (27 January 2022)
+-------------------------
+
+Many thanks to the 20 contributors to the v0.21.0 release!
+
+Abel Aoun, Anderson Banihirwe, Ant Gib, Chris Roat, Cindy Chiao,
+Deepak Cherian, Dominik Stańczak, Fabian Hofmann, Illviljan, Jody Klymak, Joseph
+K Aicher, Mark Harfouche, Mathias Hauser, Matthew Roeschke, Maximilian Roos,
+Michael Delgado, Pascal Bourgault, Pierre, Ray Bell, Romain Caneill, Tim Heap,
+Tom Nicholas, Zeb Nicholls, joseph nowak, keewis.
 
-v0.21.0 (unreleased)
----------------------
 
 New Features
 ~~~~~~~~~~~~
 - New top-level function :py:func:`cross`. (:issue:`3279`, :pull:`5365`).
   By `Jimmy Westling <https://github.com/illviljan>`_.
-
+- ``keep_attrs`` support for :py:func:`where` (:issue:`4141`, :issue:`4682`, :pull:`4687`).
+  By `Justus Magin <https://github.com/keewis>`_.
 - Enable the limit option for dask array in the following methods :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` (:issue:`6112`)
   By `Joseph Nowak <https://github.com/josephnowak>`_.
 
+
 Breaking changes
 ~~~~~~~~~~~~~~~~
 - Rely on matplotlib's default datetime converters instead of pandas' (:issue:`6102`, :pull:`6109`).
   By `Jimmy Westling <https://github.com/illviljan>`_.
 - Improve repr readability when there are a large number of dimensions in datasets or dataarrays by
-  wrapping the text once the maximum display width has been exceeded. (:issue: `5546`, :pull:`5662`)
+  wrapping the text once the maximum display width has been exceeded. (:issue:`5546`, :pull:`5662`)
   By `Jimmy Westling <https://github.com/illviljan>`_.
 
 
@@ -57,13 +172,12 @@ Bug fixes
 - Fix applying function with non-xarray arguments using :py:func:`xr.map_blocks`.
   By `Cindy Chiao <https://github.com/tcchiao>`_.
 
-- `dt.season <https://xarray.pydata.org/en/stable/generated/xarray.DataArray.dt.season.html>`_  can now handle NaN and NaT.  (:pull:`5876`).
+- No longer raise an error for an all-nan-but-one argument to
+  :py:meth:`DataArray.interpolate_na` when using `method='nearest'` (:issue:`5994`, :pull:`6144`).
+  By `Michael Delgado <https://github.com/delgadom>`_.
+- `dt.season <https://docs.xarray.dev/en/stable/generated/xarray.DataArray.dt.season.html>`_  can now handle NaN and NaT.  (:pull:`5876`).
   By `Pierre Loicq <https://github.com/pierreloicq>`_.
-
-
-Documentation
-~~~~~~~~~~~~~
-
+- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain circumstances (:pull:`5526`). By `Chris Roat <https://github.com/chrisroat>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
@@ -74,6 +188,8 @@ Internal Changes
 - Removed internal checks for ``pd.Panel`` (:issue:`6145`).
   By `Matthew Roeschke <https://github.com/mroeschke>`_.
 
+- Add ``pyupgrade`` pre-commit hook (:pull:`6152`).
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 .. _whats-new.0.20.2:
 
@@ -121,7 +237,8 @@ Documentation
   By `Deepak Cherian <https://github.com/dcherian>`_,
   `Maximilian Roos <https://github.com/max-sixty>`_,
   `Jimmy Westling <https://github.com/illviljan>`_ .
-
+- Add list-like possibility for tolerance parameter in the reindex functions.
+  By `Antoine Gibek <https://github.com/antscloud>`_,
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
@@ -628,7 +745,7 @@ Breaking changes
   By `Alessandro Amici <https://github.com/alexamici>`_.
 - Functions that are identities for 0d data return the unchanged data
   if axis is empty. This ensures that Datasets where some variables do
-  not have the averaged dimensions are not accidentially changed
+  not have the averaged dimensions are not accidentally changed
   (:issue:`4885`, :pull:`5207`).
   By `David Schwörer <https://github.com/dschwoerer>`_.
 - :py:attr:`DataArray.coarsen` and :py:attr:`Dataset.coarsen` no longer support passing ``keep_attrs``
@@ -1341,7 +1458,7 @@ New Features
 Enhancements
 ~~~~~~~~~~~~
 - Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp`
-  We performs independant interpolation sequentially rather than interpolating in
+  We performs independent interpolation sequentially rather than interpolating in
   one large multidimensional space. (:issue:`2223`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - :py:meth:`DataArray.interp` now support interpolations over chunked dimensions (:pull:`4155`). By `Alexandre Poux <https://github.com/pums974>`_.
@@ -1873,7 +1990,7 @@ Bug fixes
 
 Documentation
 ~~~~~~~~~~~~~
-- Fix leap year condition in `monthly means example <http://xarray.pydata.org/en/stable/examples/monthly-means.html>`_.
+- Fix leap year condition in `monthly means example <https://docs.xarray.dev/en/stable/examples/monthly-means.html>`_.
   By `Mickaël Lalande <https://github.com/mickaellalande>`_.
 - Fix the documentation of :py:meth:`DataArray.resample` and
   :py:meth:`Dataset.resample`,  explicitly stating that a
@@ -2212,7 +2329,7 @@ Bug fixes
 Documentation
 ~~~~~~~~~~~~~
 
-- Created a `PR checklist <https://xarray.pydata.org/en/stable/contributing.html/contributing.html#pr-checklist>`_
+- Created a `PR checklist <https://docs.xarray.dev/en/stable/contributing.html#pr-checklist>`_
   as a quick reference for tasks before creating a new PR
   or pushing new commits.
   By `Gregory Gundersen <https://github.com/gwgundersen>`_.
@@ -2692,7 +2809,7 @@ Breaking changes
   - ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`.
     Call :py:meth:`Dataset.transpose` directly instead.
   - Iterating over a ``Dataset`` now includes only data variables, not coordinates.
-    Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now
+    Similarly, calling ``len`` and ``bool`` on a ``Dataset`` now
     includes only data variables.
   - ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks
     array data, not coordinates.
@@ -3277,7 +3394,7 @@ Backwards incompatible changes
   simple: convert your objects explicitly into NumPy arrays before calling the
   ufunc (e.g., with ``.values``).
 
-.. _ufunc methods: https://docs.scipy.org/doc/numpy/reference/ufuncs.html#methods
+.. _ufunc methods: https://numpy.org/doc/stable/reference/ufuncs.html#methods
 
 Enhancements
 ~~~~~~~~~~~~
@@ -3830,7 +3947,7 @@ Bug fixes
   (:issue:`1606`).
   By `Joe Hamman <https://github.com/jhamman>`_.
 
-- Fix bug when using ``pytest`` class decorators to skiping certain unittests.
+- Fix bug when using ``pytest`` class decorators to skipping certain unittests.
   The previous behavior unintentionally causing additional tests to be skipped
   (:issue:`1531`). By `Joe Hamman <https://github.com/jhamman>`_.
 
@@ -3969,7 +4086,7 @@ Bug fixes
 Documentation
 ~~~~~~~~~~~~~
 
-- A new `gallery <http://xarray.pydata.org/en/latest/auto_gallery/index.html>`_
+- A new `gallery <https://docs.xarray.dev/en/latest/auto_gallery/index.html>`_
   allows to add interactive examples to the documentation.
   By `Fabien Maussion <https://github.com/fmaussion>`_.
 
@@ -4721,8 +4838,8 @@ scientists who work with actual x-rays are interested in using this project in
 their work. Thanks for your understanding and patience in this transition. You
 can now find our documentation and code repository at new URLs:
 
-- http://xarray.pydata.org
-- http://github.com/pydata/xarray/
+- https://docs.xarray.dev
+- https://github.com/pydata/xarray/
 
 To ease the transition, we have simultaneously released v0.7.0 of both
 ``xray`` and ``xarray`` on the Python Package Index. These packages are
@@ -5060,7 +5177,7 @@ Enhancements
   .. ipython:: python
 
       ds = xray.Dataset(coords={"x": range(100), "y": range(100)})
-      ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2)
+      ds["distance"] = np.sqrt(ds.x**2 + ds.y**2)
 
       @savefig where_example.png width=4in height=4in
       ds.distance.where(ds.distance < 100).plot()
@@ -5268,7 +5385,7 @@ Enhancements
   .. ipython:: python
 
       ds = xray.Dataset({"y": ("x", [1, 2, 3])})
-      ds.assign(z=lambda ds: ds.y ** 2)
+      ds.assign(z=lambda ds: ds.y**2)
       ds.assign_coords(z=("x", ["a", "b", "c"]))
 
   These methods return a new Dataset (or DataArray) with updated data or
@@ -5578,7 +5695,7 @@ Bug fixes
 - Several bug fixes related to decoding time units from netCDF files
   (:issue:`316`, :issue:`330`). Thanks Stefan Pfenninger!
 - xray no longer requires ``decode_coords=False`` when reading datasets with
-  unparseable coordinate attributes (:issue:`308`).
+  unparsable coordinate attributes (:issue:`308`).
 - Fixed ``DataArray.loc`` indexing with ``...`` (:issue:`318`).
 - Fixed an edge case that resulting in an error when reindexing
   multi-dimensional variables (:issue:`315`).
@@ -5601,9 +5718,9 @@ is supporting out-of-core operations in xray using Dask_, a part of the Blaze_
 project. For a preview of using Dask with weather data, read
 `this blog post`_ by Matthew Rocklin. See :issue:`328` for more details.
 
-.. _Dask: http://dask.pydata.org
-.. _Blaze: http://blaze.pydata.org
-.. _this blog post: http://matthewrocklin.com/blog/work/2015/02/13/Towards-OOC-Slicing-and-Stacking/
+.. _Dask: https://dask.org
+.. _Blaze: https://blaze.pydata.org
+.. _this blog post: https://matthewrocklin.com/blog/work/2015/02/13/Towards-OOC-Slicing-and-Stacking
 
 v0.3.2 (23 December, 2014)
 --------------------------
diff --git a/readthedocs.yml b/readthedocs.yml
index 072a4b5110c..89266a10fc8 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,12 +1,10 @@
 version: 2
-
 build:
-    image: latest
-
-conda:
-    environment: ci/requirements/doc.yml
-
+  os: ubuntu-20.04
+  tools:
+    python: mambaforge-4.10
 sphinx:
   fail_on_warning: true
-
+conda:
+  environment: ci/requirements/doc.yml
 formats: []
diff --git a/requirements.txt b/requirements.txt
index 729a3655125..37417908cf4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@
 # https://help.github.com/en/github/visualizing-repository-data-with-graphs/listing-the-packages-that-a-repository-depends-on
 
 numpy >= 1.18
+packaging >= 20.0
 pandas >= 1.1
diff --git a/setup.cfg b/setup.cfg
index 797165a9fcd..afa25325018 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -51,9 +51,9 @@ long_description =
 
     Learn more
     ----------
-    - Documentation: `<http://xarray.pydata.org>`_
-    - Issue tracker: `<http://github.com/pydata/xarray/issues>`_
-    - Source code: `<http://github.com/pydata/xarray>`_
+    - Documentation: `<https://docs.xarray.dev>`_
+    - Issue tracker: `<https://github.com/pydata/xarray/issues>`_
+    - Source code: `<https://github.com/pydata/xarray>`_
     - SciPy2015 talk: `<https://www.youtube.com/watch?v=X0pAhJgySxk>`_
 
 url = https://github.com/pydata/xarray
@@ -77,6 +77,7 @@ python_requires = >=3.8
 install_requires =
     numpy >= 1.18
     pandas >= 1.1
+    packaging >= 20.0
 
 [options.extras_require]
 io =
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 0ca82555c8f..548b98048ba 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -834,8 +834,8 @@ def open_mfdataset(
     References
     ----------
 
-    .. [1] http://xarray.pydata.org/en/stable/dask.html
-    .. [2] http://xarray.pydata.org/en/stable/dask.html#chunking-and-performance
+    .. [1] https://docs.xarray.dev/en/stable/dask.html
+    .. [2] https://docs.xarray.dev/en/stable/dask.html#chunking-and-performance
     """
     if isinstance(paths, str):
         if is_remote_uri(paths) and engine == "zarr":
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
index f33a9ab2814..ad92a6c5869 100644
--- a/xarray/backends/common.py
+++ b/xarray/backends/common.py
@@ -65,7 +65,7 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500
         except catch:
             if n == max_retries:
                 raise
-            base_delay = initial_delay * 2 ** n
+            base_delay = initial_delay * 2**n
             next_delay = base_delay + np.random.randint(base_delay)
             msg = (
                 f"getitem failed, waiting {next_delay} ms before trying again "
@@ -160,7 +160,7 @@ def sync(self, compute=True):
             import dask.array as da
 
             # TODO: consider wrapping targets with dask.delayed, if this makes
-            # for any discernable difference in perforance, e.g.,
+            # for any discernible difference in perforance, e.g.,
             # targets = [dask.delayed(t) for t in self.targets]
 
             delayed_store = da.store(
diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py
index 47a4201539b..06be03e4e44 100644
--- a/xarray/backends/file_manager.py
+++ b/xarray/backends/file_manager.py
@@ -204,7 +204,7 @@ def _acquire_with_cache_info(self, needs_lock=True):
                     kwargs["mode"] = self._mode
                 file = self._opener(*self._args, **kwargs)
                 if self._mode == "w":
-                    # ensure file doesn't get overriden when opened again
+                    # ensure file doesn't get overridden when opened again
                     self._mode = "a"
                 self._cache[self._key] = file
                 return file, False
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index a52e539181f..70fc3a76266 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -81,7 +81,11 @@ def _read_attributes(h5netcdf_var):
 
 
 _extract_h5nc_encoding = functools.partial(
-    _extract_nc4_variable_encoding, lsd_okay=False, h5py_okay=True, backend="h5netcdf"
+    _extract_nc4_variable_encoding,
+    lsd_okay=False,
+    h5py_okay=True,
+    backend="h5netcdf",
+    unlimited_dims=None,
 )
 
 
@@ -159,13 +163,7 @@ def open(
 
         kwargs = {"invalid_netcdf": invalid_netcdf}
         if phony_dims is not None:
-            if Version(h5netcdf.__version__) >= Version("0.8.0"):
-                kwargs["phony_dims"] = phony_dims
-            else:
-                raise ValueError(
-                    "h5netcdf backend keyword argument 'phony_dims' needs "
-                    "h5netcdf >= 0.8.0."
-                )
+            kwargs["phony_dims"] = phony_dims
         if Version(h5netcdf.__version__) >= Version("0.10.0") and Version(
             h5netcdf.core.h5py.__version__
         ) >= Version("3.0.0"):
@@ -237,12 +235,24 @@ def get_attrs(self):
         return FrozenDict(_read_attributes(self.ds))
 
     def get_dimensions(self):
-        return self.ds.dimensions
+        if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
+            return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
+        else:
+            return self.ds.dimensions
 
     def get_encoding(self):
-        return {
-            "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None}
-        }
+        if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
+            return {
+                "unlimited_dims": {
+                    k for k, v in self.ds.dimensions.items() if v.isunlimited()
+                }
+            }
+        else:
+            return {
+                "unlimited_dims": {
+                    k for k, v in self.ds.dimensions.items() if v is None
+                }
+            }
 
     def set_dimension(self, name, length, is_unlimited=False):
         if is_unlimited:
@@ -270,7 +280,7 @@ def prepare_variable(
             raise NotImplementedError(
                 "h5netcdf does not yet support setting a fill value for "
                 "variable-length strings "
-                "(https://github.com/shoyer/h5netcdf/issues/37). "
+                "(https://github.com/h5netcdf/h5netcdf/issues/37). "
                 f"Either remove '_FillValue' from encoding on variable {name!r} "
                 "or set {'dtype': 'S1'} in encoding to use the fixed width "
                 "NC_CHAR type."
diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py
index 0a9ffcbda22..7444fbf11eb 100644
--- a/xarray/backends/plugins.py
+++ b/xarray/backends/plugins.py
@@ -1,18 +1,11 @@
 import functools
 import inspect
 import itertools
-import sys
 import warnings
+from importlib.metadata import entry_points
 
 from .common import BACKEND_ENTRYPOINTS, BackendEntrypoint
 
-if sys.version_info >= (3, 8):
-    from importlib.metadata import entry_points
-else:
-    # if the fallback library is missing, we are doomed.
-    from importlib_metadata import entry_points
-
-
 STANDARD_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"]
 
 
@@ -133,23 +126,23 @@ def guess_engine(store_spec):
                 f"backends {installed_engines}. Consider explicitly selecting one of the "
                 "installed engines via the ``engine`` parameter, or installing "
                 "additional IO dependencies, see:\n"
-                "http://xarray.pydata.org/en/stable/getting-started-guide/installing.html\n"
-                "http://xarray.pydata.org/en/stable/user-guide/io.html"
+                "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n"
+                "https://docs.xarray.dev/en/stable/user-guide/io.html"
             )
         else:
             error_msg = (
                 "xarray is unable to open this file because it has no currently "
                 "installed IO backends. Xarray's read/write support requires "
                 "installing optional IO dependencies, see:\n"
-                "http://xarray.pydata.org/en/stable/getting-started-guide/installing.html\n"
-                "http://xarray.pydata.org/en/stable/user-guide/io"
+                "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html\n"
+                "https://docs.xarray.dev/en/stable/user-guide/io"
             )
     else:
         error_msg = (
             "found the following matches with the input file in xarray's IO "
             f"backends: {compatible_engines}. But their dependencies may not be installed, see:\n"
-            "http://xarray.pydata.org/en/stable/user-guide/io.html \n"
-            "http://xarray.pydata.org/en/stable/getting-started-guide/installing.html"
+            "https://docs.xarray.dev/en/stable/user-guide/io.html \n"
+            "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html"
         )
 
     raise ValueError(error_msg)
@@ -168,10 +161,8 @@ def get_backend(engine):
         backend = engine()
     else:
         raise TypeError(
-            (
-                "engine must be a string or a subclass of "
-                f"xarray.backends.BackendEntrypoint: {engine}"
-            )
+            "engine must be a string or a subclass of "
+            f"xarray.backends.BackendEntrypoint: {engine}"
         )
 
     return backend
diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py
index da178926dbe..a2ca7f0206c 100644
--- a/xarray/backends/pseudonetcdf_.py
+++ b/xarray/backends/pseudonetcdf_.py
@@ -105,7 +105,7 @@ class PseudoNetCDFBackendEntrypoint(BackendEntrypoint):
     available = has_pseudonetcdf
 
     # *args and **kwargs are not allowed in open_backend_dataset_ kwargs,
-    # unless the open_dataset_parameters are explicity defined like this:
+    # unless the open_dataset_parameters are explicitly defined like this:
     open_dataset_parameters = (
         "filename_or_obj",
         "mask_and_scale",
diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py
index ffaf3793928..a5a1430abf2 100644
--- a/xarray/backends/pydap_.py
+++ b/xarray/backends/pydap_.py
@@ -86,9 +86,40 @@ def __init__(self, ds):
         self.ds = ds
 
     @classmethod
-    def open(cls, url, session=None):
+    def open(
+        cls,
+        url,
+        application=None,
+        session=None,
+        output_grid=None,
+        timeout=None,
+        verify=None,
+        user_charset=None,
+    ):
+
+        if output_grid is None:
+            output_grid = True
+
+        if verify is None:
+            verify = True
+
+        if timeout is None:
+            from pydap.lib import DEFAULT_TIMEOUT
 
-        ds = pydap.client.open_url(url, session=session)
+            timeout = DEFAULT_TIMEOUT
+
+        if user_charset is None:
+            user_charset = "ascii"
+
+        ds = pydap.client.open_url(
+            url=url,
+            application=application,
+            session=session,
+            output_grid=output_grid,
+            timeout=timeout,
+            verify=verify,
+            user_charset=user_charset,
+        )
         return cls(ds)
 
     def open_store_variable(self, var):
@@ -123,12 +154,22 @@ def open_dataset(
         drop_variables=None,
         use_cftime=None,
         decode_timedelta=None,
+        application=None,
         session=None,
+        output_grid=None,
+        timeout=None,
+        verify=None,
+        user_charset=None,
     ):
 
         store = PydapDataStore.open(
-            filename_or_obj,
+            url=filename_or_obj,
+            application=application,
             session=session,
+            output_grid=output_grid,
+            timeout=timeout,
+            verify=verify,
+            user_charset=user_charset,
         )
 
         store_entrypoint = StoreBackendEntrypoint()
diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
index 9600827a807..7f3791ffca2 100644
--- a/xarray/backends/rasterio_.py
+++ b/xarray/backends/rasterio_.py
@@ -189,7 +189,7 @@ def open_rasterio(
 
         >>> from affine import Affine
         >>> da = xr.open_rasterio(
-        ...     "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/RGB.byte.tif"
+        ...     "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/RGB.byte.tif"
         ... )
         >>> da
         <xarray.DataArray (band: 3, y: 718, x: 791)>
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 8bd343869ff..97517818d07 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -84,7 +84,8 @@ def __getitem__(self, key):
 
 def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
     """
-    Given encoding chunks (possibly None) and variable chunks (possibly None)
+    Given encoding chunks (possibly None or []) and variable chunks
+    (possibly None or []).
     """
 
     # zarr chunk spec:
@@ -93,7 +94,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
 
     # if there are no chunks in encoding and the variable data is a numpy
     # array, then we let zarr use its own heuristics to pick the chunks
-    if var_chunks is None and enc_chunks is None:
+    if not var_chunks and not enc_chunks:
         return None
 
     # if there are no chunks in encoding but there are dask chunks, we try to
@@ -102,7 +103,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
     # http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks
     # while dask chunks can be variable sized
     # http://dask.pydata.org/en/latest/array-design.html#chunks
-    if var_chunks and enc_chunks is None:
+    if var_chunks and not enc_chunks:
         if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
             raise ValueError(
                 "Zarr requires uniform chunk sizes except for final chunk. "
@@ -145,7 +146,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
 
     # if there are chunks in encoding and the variable data is a numpy array,
     # we use the specified chunks
-    if var_chunks is None:
+    if not var_chunks:
         return enc_chunks_tuple
 
     # the hard case
@@ -159,8 +160,6 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
     # threads
     if var_chunks and enc_chunks_tuple:
         for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks):
-            if len(dchunks) == 1:
-                continue
             for dchunk in dchunks[:-1]:
                 if dchunk % zchunk:
                     base_error = (
@@ -174,28 +173,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
                             + " Consider either rechunking using `chunk()`, deleting "
                             "or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
                         )
-            if dchunks[-1] > zchunk:
-                base_error = (
-                    "Final chunk of Zarr array must be the same size or "
-                    "smaller than the first. "
-                    f"Specified Zarr chunk encoding['chunks']={enc_chunks_tuple}, "
-                    f"for variable named {name!r} "
-                    f"but {dchunks} in the variable's Dask chunks {var_chunks} are "
-                    "incompatible with this encoding. "
-                )
-                if safe_chunks:
-                    raise NotImplementedError(
-                        base_error
-                        + " Consider either rechunking using `chunk()`, deleting "
-                        "or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
-                    )
         return enc_chunks_tuple
 
     raise AssertionError("We should never get here. Function logic must be wrong.")
 
 
 def _get_zarr_dims_and_attrs(zarr_obj, dimension_key):
-    # Zarr arrays do not have dimenions. To get around this problem, we add
+    # Zarr arrays do not have dimensions. To get around this problem, we add
     # an attribute that specifies the dimension. We have to hide this attribute
     # when we send the attributes to the user.
     # zarr_obj can be either a zarr group or zarr array
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index 2db6d4e8097..a4e2870650d 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -39,11 +39,12 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from __future__ import annotations
 
 import re
 from datetime import datetime, timedelta
 from functools import partial
-from typing import ClassVar, Optional
+from typing import ClassVar
 
 import numpy as np
 import pandas as pd
@@ -87,10 +88,10 @@ def get_date_type(calendar, use_cftime=True):
 
 
 class BaseCFTimeOffset:
-    _freq: ClassVar[Optional[str]] = None
-    _day_option: ClassVar[Optional[str]] = None
+    _freq: ClassVar[str | None] = None
+    _day_option: ClassVar[str | None] = None
 
-    def __init__(self, n=1):
+    def __init__(self, n: int = 1):
         if not isinstance(n, int):
             raise TypeError(
                 "The provided multiple 'n' must be an integer. "
@@ -122,6 +123,8 @@ def __sub__(self, other):
             return NotImplemented
 
     def __mul__(self, other):
+        if not isinstance(other, int):
+            return NotImplemented
         return type(self)(n=other * self.n)
 
     def __neg__(self):
@@ -160,7 +163,7 @@ def rollback(self, date):
             return date - type(self)()
 
     def __str__(self):
-        return "<{}: n={}>".format(type(self).__name__, self.n)
+        return f"<{type(self).__name__}: n={self.n}>"
 
     def __repr__(self):
         return str(self)
@@ -171,6 +174,40 @@ def _get_offset_day(self, other):
         return _get_day_of_month(other, self._day_option)
 
 
+class Tick(BaseCFTimeOffset):
+    # analogous https://github.com/pandas-dev/pandas/blob/ccb25ab1d24c4fb9691270706a59c8d319750870/pandas/_libs/tslibs/offsets.pyx#L806
+
+    def _next_higher_resolution(self):
+        self_type = type(self)
+        if self_type not in [Day, Hour, Minute, Second, Millisecond]:
+            raise ValueError("Could not convert to integer offset at any resolution")
+        if type(self) is Day:
+            return Hour(self.n * 24)
+        if type(self) is Hour:
+            return Minute(self.n * 60)
+        if type(self) is Minute:
+            return Second(self.n * 60)
+        if type(self) is Second:
+            return Millisecond(self.n * 1000)
+        if type(self) is Millisecond:
+            return Microsecond(self.n * 1000)
+
+    def __mul__(self, other):
+        if not isinstance(other, (int, float)):
+            return NotImplemented
+        if isinstance(other, float):
+            n = other * self.n
+            # If the new `n` is an integer, we can represent it using the
+            #  same BaseCFTimeOffset subclass as self, otherwise we need to move up
+            #  to a higher-resolution subclass
+            if np.isclose(n % 1, 0):
+                return type(self)(int(n))
+
+            new_self = self._next_higher_resolution()
+            return new_self * other
+        return type(self)(n=other * self.n)
+
+
 def _get_day_of_month(other, day_option):
     """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's
     onOffset policy, as described by the `day_option` argument.
@@ -396,13 +433,15 @@ def __sub__(self, other):
             return NotImplemented
 
     def __mul__(self, other):
+        if isinstance(other, float):
+            return NotImplemented
         return type(self)(n=other * self.n, month=self.month)
 
     def rule_code(self):
-        return "{}-{}".format(self._freq, _MONTH_ABBREVIATIONS[self.month])
+        return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}"
 
     def __str__(self):
-        return "<{}: n={}, month={}>".format(type(self).__name__, self.n, self.month)
+        return f"<{type(self).__name__}: n={self.n}, month={self.month}>"
 
 
 class QuarterBegin(QuarterOffset):
@@ -482,13 +521,15 @@ def __sub__(self, other):
             return NotImplemented
 
     def __mul__(self, other):
+        if isinstance(other, float):
+            return NotImplemented
         return type(self)(n=other * self.n, month=self.month)
 
     def rule_code(self):
-        return "{}-{}".format(self._freq, _MONTH_ABBREVIATIONS[self.month])
+        return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}"
 
     def __str__(self):
-        return "<{}: n={}, month={}>".format(type(self).__name__, self.n, self.month)
+        return f"<{type(self).__name__}: n={self.n}, month={self.month}>"
 
 
 class YearBegin(YearOffset):
@@ -541,7 +582,7 @@ def rollback(self, date):
             return date - YearEnd(month=self.month)
 
 
-class Day(BaseCFTimeOffset):
+class Day(Tick):
     _freq = "D"
 
     def as_timedelta(self):
@@ -551,7 +592,7 @@ def __apply__(self, other):
         return other + self.as_timedelta()
 
 
-class Hour(BaseCFTimeOffset):
+class Hour(Tick):
     _freq = "H"
 
     def as_timedelta(self):
@@ -561,7 +602,7 @@ def __apply__(self, other):
         return other + self.as_timedelta()
 
 
-class Minute(BaseCFTimeOffset):
+class Minute(Tick):
     _freq = "T"
 
     def as_timedelta(self):
@@ -571,7 +612,7 @@ def __apply__(self, other):
         return other + self.as_timedelta()
 
 
-class Second(BaseCFTimeOffset):
+class Second(Tick):
     _freq = "S"
 
     def as_timedelta(self):
@@ -581,7 +622,7 @@ def __apply__(self, other):
         return other + self.as_timedelta()
 
 
-class Millisecond(BaseCFTimeOffset):
+class Millisecond(Tick):
     _freq = "L"
 
     def as_timedelta(self):
@@ -591,7 +632,7 @@ def __apply__(self, other):
         return other + self.as_timedelta()
 
 
-class Microsecond(BaseCFTimeOffset):
+class Microsecond(Tick):
     _freq = "U"
 
     def as_timedelta(self):
@@ -671,7 +712,7 @@ def __apply__(self, other):
 
 
 _FREQUENCY_CONDITION = "|".join(_FREQUENCIES.keys())
-_PATTERN = fr"^((?P<multiple>\d+)|())(?P<freq>({_FREQUENCY_CONDITION}))$"
+_PATTERN = rf"^((?P<multiple>\d+)|())(?P<freq>({_FREQUENCY_CONDITION}))$"
 
 
 # pandas defines these offsets as "Tick" objects, which for instance have
@@ -741,7 +782,7 @@ def _generate_linear_range(start, end, periods):
 
     total_seconds = (end - start).total_seconds()
     values = np.linspace(0.0, total_seconds, periods, endpoint=True)
-    units = "seconds since {}".format(format_cftime_datetime(start))
+    units = f"seconds since {format_cftime_datetime(start)}"
     calendar = start.calendar
     return cftime.num2date(
         values, units=units, calendar=calendar, only_use_cftime_datetimes=True
diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
index 9bb8da1568b..d522d7910d4 100644
--- a/xarray/coding/cftimeindex.py
+++ b/xarray/coding/cftimeindex.py
@@ -38,11 +38,11 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from __future__ import annotations
 
 import re
 import warnings
 from datetime import timedelta
-from typing import Tuple, Type
 
 import numpy as np
 import pandas as pd
@@ -66,7 +66,7 @@
 REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END = 10
 
 
-OUT_OF_BOUNDS_TIMEDELTA_ERRORS: Tuple[Type[Exception], ...]
+OUT_OF_BOUNDS_TIMEDELTA_ERRORS: tuple[type[Exception], ...]
 try:
     OUT_OF_BOUNDS_TIMEDELTA_ERRORS = (pd.errors.OutOfBoundsTimedelta, OverflowError)
 except AttributeError:
@@ -310,7 +310,7 @@ class CFTimeIndex(pd.Index):
     )
     date_type = property(get_date_type)
 
-    def __new__(cls, data, name=None):
+    def __new__(cls, data, name=None, **kwargs):
         assert_all_valid_date_type(data)
         if name is None and hasattr(data, "name"):
             name = data.name
@@ -407,7 +407,7 @@ def _partial_date_slice(self, resolution, parsed):
 
         times = self._data
 
-        if self.is_monotonic:
+        if self.is_monotonic_increasing:
             if len(times) and (
                 (start < times[0] and end < times[0])
                 or (start > times[-1] and end > times[-1])
@@ -511,7 +511,7 @@ def contains(self, key):
         """Needed for .loc based partial-string indexing"""
         return self.__contains__(key)
 
-    def shift(self, n, freq):
+    def shift(self, n: int | float, freq: str | timedelta):
         """Shift the CFTimeIndex a multiple of the given frequency.
 
         See the documentation for :py:func:`~xarray.cftime_range` for a
@@ -519,7 +519,7 @@ def shift(self, n, freq):
 
         Parameters
         ----------
-        n : int
+        n : int, float if freq of days or below
             Periods to shift by
         freq : str or datetime.timedelta
             A frequency string or datetime.timedelta object to shift by
@@ -537,18 +537,19 @@ def shift(self, n, freq):
         >>> index = xr.cftime_range("2000", periods=1, freq="M")
         >>> index
         CFTimeIndex([2000-01-31 00:00:00],
-                    dtype='object', length=1, calendar='gregorian', freq=None)
+                    dtype='object', length=1, calendar='standard', freq=None)
         >>> index.shift(1, "M")
         CFTimeIndex([2000-02-29 00:00:00],
-                    dtype='object', length=1, calendar='gregorian', freq=None)
+                    dtype='object', length=1, calendar='standard', freq=None)
+        >>> index.shift(1.5, "D")
+        CFTimeIndex([2000-02-01 12:00:00],
+                    dtype='object', length=1, calendar='standard', freq=None)
         """
-        from .cftime_offsets import to_offset
-
-        if not isinstance(n, int):
-            raise TypeError(f"'n' must be an int, got {n}.")
         if isinstance(freq, timedelta):
             return self + n * freq
         elif isinstance(freq, str):
+            from .cftime_offsets import to_offset
+
             return self + n * to_offset(freq)
         else:
             raise TypeError(
@@ -626,7 +627,7 @@ def to_datetimeindex(self, unsafe=False):
         >>> times = xr.cftime_range("2000", periods=2, calendar="gregorian")
         >>> times
         CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00],
-                    dtype='object', length=2, calendar='gregorian', freq=None)
+                    dtype='object', length=2, calendar='standard', freq=None)
         >>> times.to_datetimeindex()
         DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None)
         """
diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py
index aeffab0c2d7..e4b1e906160 100644
--- a/xarray/coding/strings.py
+++ b/xarray/coding/strings.py
@@ -18,7 +18,7 @@
 
 def create_vlen_dtype(element_type):
     if element_type not in (str, bytes):
-        raise TypeError("unsupported type for vlen_dtype: {!r}".format(element_type))
+        raise TypeError(f"unsupported type for vlen_dtype: {element_type!r}")
     # based on h5py.special_dtype
     return np.dtype("O", metadata={"element_type": element_type})
 
@@ -227,7 +227,7 @@ def shape(self):
         return self.array.shape[:-1]
 
     def __repr__(self):
-        return "{}({!r})".format(type(self).__name__, self.array)
+        return f"{type(self).__name__}({self.array!r})"
 
     def __getitem__(self, key):
         # require slicing the last dimension completely
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index c89b0c100cd..0eb8707f0cc 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -131,8 +131,8 @@ def _ensure_padded_year(ref_date):
     matches_start_digits = re.match(r"(\d+)(.*)", ref_date)
     if not matches_start_digits:
         raise ValueError(f"invalid reference date for time units: {ref_date}")
-    ref_year, everything_else = [s for s in matches_start_digits.groups()]
-    ref_date_padded = "{:04d}{}".format(int(ref_year), everything_else)
+    ref_year, everything_else = (s for s in matches_start_digits.groups())
+    ref_date_padded = f"{int(ref_year):04d}{everything_else}"
 
     warning_msg = (
         f"Ambiguous reference date string: {ref_date}. The first value is "
@@ -155,7 +155,7 @@ def _unpack_netcdf_time_units(units):
     if not matches:
         raise ValueError(f"invalid time units: {units}")
 
-    delta_units, ref_date = [s.strip() for s in matches.groups()]
+    delta_units, ref_date = (s.strip() for s in matches.groups())
     ref_date = _ensure_padded_year(ref_date)
 
     return delta_units, ref_date
@@ -545,7 +545,7 @@ def _should_cftime_be_used(source, target_calendar, use_cftime):
 def _cleanup_netcdf_time_units(units):
     delta, ref_date = _unpack_netcdf_time_units(units)
     try:
-        units = "{} since {}".format(delta, format_timestamp(ref_date))
+        units = f"{delta} since {format_timestamp(ref_date)}"
     except (OutOfBoundsDatetime, ValueError):
         # don't worry about reifying the units if they're out of bounds or
         # formatted badly
diff --git a/xarray/convert.py b/xarray/convert.py
index 0fbd1e13163..93b0a30e57b 100644
--- a/xarray/convert.py
+++ b/xarray/convert.py
@@ -235,7 +235,7 @@ def _iris_cell_methods_to_str(cell_methods_obj):
 
 
 def _name(iris_obj, default="unknown"):
-    """Mimicks `iris_obj.name()` but with different name resolution order.
+    """Mimics `iris_obj.name()` but with different name resolution order.
 
     Similar to iris_obj.name() method, but using iris_obj.var_name first to
     enable roundtripping.
diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py
index 58fca69b187..83aaa10a20c 100644
--- a/xarray/core/_reductions.py
+++ b/xarray/core/_reductions.py
@@ -1,43 +1,34 @@
 """Mixin classes with reduction operations."""
 # This file was generated using xarray.util.generate_reductions. Do not edit manually.
 
-from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union
+from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union
 
 from . import duck_array_ops
-from .options import OPTIONS
-from .utils import contains_only_dask_or_numpy
+from .types import T_DataArray, T_Dataset
 
-if TYPE_CHECKING:
-    from .dataarray import DataArray
-    from .dataset import Dataset
-
-try:
-    import flox
-except ImportError:
-    flox = None
-
-
-class DatasetReductions:
-    __slots__ = ()
 
+class DatasetReduce(Protocol):
     def reduce(
         self,
         func: Callable[..., Any],
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        *,
         axis: Union[None, int, Sequence[int]] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         **kwargs: Any,
-    ) -> "Dataset":
-        raise NotImplementedError()
+    ) -> T_Dataset:
+        ...
+
+
+class DatasetGroupByReductions:
+    __slots__ = ()
 
     def count(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``count`` along some dimension(s).
 
@@ -45,7 +36,8 @@ def count(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -53,7 +45,6 @@ def count(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``count`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -61,14 +52,6 @@ def count(
             New Dataset with ``count`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.count
-        dask.array.count
-        DataArray.count
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -89,11 +72,20 @@ def count(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.count()
+        >>> ds.groupby("labels").count()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       int64 5
+            da       (labels) int64 1 2 2
+
+        See Also
+        --------
+        numpy.count
+        Dataset.count
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.count,
@@ -104,11 +96,11 @@ def count(
         )
 
     def all(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``all`` along some dimension(s).
 
@@ -116,7 +108,8 @@ def all(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -124,7 +117,6 @@ def all(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``all`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -132,14 +124,6 @@ def all(
             New Dataset with ``all`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.all
-        dask.array.all
-        DataArray.all
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -160,11 +144,20 @@ def all(
         Data variables:
             da       (time) bool True True True True True False
 
-        >>> ds.all()
+        >>> ds.groupby("labels").all()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       bool False
+            da       (labels) bool False True True
+
+        See Also
+        --------
+        numpy.all
+        Dataset.all
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.array_all,
@@ -175,11 +168,11 @@ def all(
         )
 
     def any(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``any`` along some dimension(s).
 
@@ -187,7 +180,8 @@ def any(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -195,7 +189,6 @@ def any(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``any`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -203,14 +196,6 @@ def any(
             New Dataset with ``any`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.any
-        dask.array.any
-        DataArray.any
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -231,11 +216,20 @@ def any(
         Data variables:
             da       (time) bool True True True True True False
 
-        >>> ds.any()
+        >>> ds.groupby("labels").any()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       bool True
+            da       (labels) bool True True True
+
+        See Also
+        --------
+        numpy.any
+        Dataset.any
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.array_any,
@@ -246,12 +240,12 @@ def any(
         )
 
     def max(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``max`` along some dimension(s).
 
@@ -259,11 +253,12 @@ def max(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -272,7 +267,6 @@ def max(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``max`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -280,14 +274,6 @@ def max(
             New Dataset with ``max`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.max
-        dask.array.max
-        DataArray.max
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -308,19 +294,30 @@ def max(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.max()
+        >>> ds.groupby("labels").max()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 3.0
+            da       (labels) float64 1.0 2.0 3.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.max(skipna=False)
+        >>> ds.groupby("labels").max(skipna=False)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 nan
+            da       (labels) float64 nan 2.0 3.0
+
+        See Also
+        --------
+        numpy.max
+        Dataset.max
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.max,
@@ -332,12 +329,12 @@ def max(
         )
 
     def min(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``min`` along some dimension(s).
 
@@ -345,11 +342,12 @@ def min(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -358,7 +356,6 @@ def min(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``min`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -366,14 +363,6 @@ def min(
             New Dataset with ``min`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.min
-        dask.array.min
-        DataArray.min
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -394,19 +383,30 @@ def min(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.min()
+        >>> ds.groupby("labels").min()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 1.0
+            da       (labels) float64 1.0 2.0 1.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.min(skipna=False)
+        >>> ds.groupby("labels").min(skipna=False)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 nan
+            da       (labels) float64 nan 2.0 1.0
+
+        See Also
+        --------
+        numpy.min
+        Dataset.min
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.min,
@@ -418,12 +418,12 @@ def min(
         )
 
     def mean(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``mean`` along some dimension(s).
 
@@ -431,11 +431,12 @@ def mean(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -444,7 +445,6 @@ def mean(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``mean`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -452,18 +452,6 @@ def mean(
             New Dataset with ``mean`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.mean
-        dask.array.mean
-        DataArray.mean
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -484,19 +472,30 @@ def mean(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.mean()
+        >>> ds.groupby("labels").mean()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 1.8
+            da       (labels) float64 1.0 2.0 2.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.mean(skipna=False)
+        >>> ds.groupby("labels").mean(skipna=False)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 nan
+            da       (labels) float64 nan 2.0 2.0
+
+        See Also
+        --------
+        numpy.mean
+        Dataset.mean
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.mean,
@@ -508,13 +507,13 @@ def mean(
         )
 
     def prod(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``prod`` along some dimension(s).
 
@@ -522,11 +521,12 @@ def prod(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -541,7 +541,6 @@ def prod(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``prod`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -549,18 +548,6 @@ def prod(
             New Dataset with ``prod`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.prod
-        dask.array.prod
-        DataArray.prod
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -581,27 +568,40 @@ def prod(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.prod()
+        >>> ds.groupby("labels").prod()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 12.0
+            da       (labels) float64 1.0 4.0 3.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.prod(skipna=False)
+        >>> ds.groupby("labels").prod(skipna=False)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 nan
+            da       (labels) float64 nan 4.0 3.0
 
         Specify ``min_count`` for finer control over when NaNs are ignored.
 
-        >>> ds.prod(skipna=True, min_count=2)
+        >>> ds.groupby("labels").prod(skipna=True, min_count=2)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 12.0
+            da       (labels) float64 nan 4.0 3.0
+
+        See Also
+        --------
+        numpy.prod
+        Dataset.prod
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.prod,
@@ -614,13 +614,13 @@ def prod(
         )
 
     def sum(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``sum`` along some dimension(s).
 
@@ -628,11 +628,12 @@ def sum(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -647,7 +648,6 @@ def sum(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``sum`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -655,18 +655,6 @@ def sum(
             New Dataset with ``sum`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.sum
-        dask.array.sum
-        DataArray.sum
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -687,27 +675,40 @@ def sum(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.sum()
+        >>> ds.groupby("labels").sum()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 9.0
+            da       (labels) float64 1.0 4.0 4.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.sum(skipna=False)
+        >>> ds.groupby("labels").sum(skipna=False)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 nan
+            da       (labels) float64 nan 4.0 4.0
 
         Specify ``min_count`` for finer control over when NaNs are ignored.
 
-        >>> ds.sum(skipna=True, min_count=2)
+        >>> ds.groupby("labels").sum(skipna=True, min_count=2)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 9.0
+            da       (labels) float64 nan 4.0 4.0
+
+        See Also
+        --------
+        numpy.sum
+        Dataset.sum
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.sum,
@@ -720,13 +721,12 @@ def sum(
         )
 
     def std(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``std`` along some dimension(s).
 
@@ -734,15 +734,13 @@ def std(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -750,7 +748,6 @@ def std(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``std`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -758,18 +755,6 @@ def std(
             New Dataset with ``std`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.std
-        dask.array.std
-        DataArray.std
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -790,46 +775,47 @@ def std(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.std()
+        >>> ds.groupby("labels").std()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 0.7483
+            da       (labels) float64 0.0 0.0 1.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.std(skipna=False)
+        >>> ds.groupby("labels").std(skipna=False)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 nan
-
-        Specify ``ddof=1`` for an unbiased estimate.
+            da       (labels) float64 nan 0.0 1.0
 
-        >>> ds.std(skipna=True, ddof=1)
-        <xarray.Dataset>
-        Dimensions:  ()
-        Data variables:
-            da       float64 0.8367
+        See Also
+        --------
+        numpy.std
+        Dataset.std
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.std,
             dim=dim,
             skipna=skipna,
-            ddof=ddof,
             numeric_only=True,
             keep_attrs=keep_attrs,
             **kwargs,
         )
 
     def var(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``var`` along some dimension(s).
 
@@ -837,15 +823,13 @@ def var(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -853,7 +837,6 @@ def var(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``var`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -861,18 +844,6 @@ def var(
             New Dataset with ``var`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.var
-        dask.array.var
-        DataArray.var
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -893,45 +864,47 @@ def var(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.var()
+        >>> ds.groupby("labels").var()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 0.56
+            da       (labels) float64 0.0 0.0 1.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.var(skipna=False)
+        >>> ds.groupby("labels").var(skipna=False)
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 nan
-
-        Specify ``ddof=1`` for an unbiased estimate.
+            da       (labels) float64 nan 0.0 1.0
 
-        >>> ds.var(skipna=True, ddof=1)
-        <xarray.Dataset>
-        Dimensions:  ()
-        Data variables:
-            da       float64 0.7
+        See Also
+        --------
+        numpy.var
+        Dataset.var
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.var,
             dim=dim,
             skipna=skipna,
-            ddof=ddof,
             numeric_only=True,
             keep_attrs=keep_attrs,
             **kwargs,
         )
 
     def median(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``median`` along some dimension(s).
 
@@ -939,11 +912,12 @@ def median(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -952,7 +926,6 @@ def median(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``median`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -960,18 +933,6 @@ def median(
             New Dataset with ``median`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.median
-        dask.array.median
-        DataArray.median
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -992,2101 +953,30 @@ def median(
         Data variables:
             da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
 
-        >>> ds.median()
+        >>> ds.groupby("labels").median()
         <xarray.Dataset>
-        Dimensions:  ()
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       float64 2.0
+            da       (labels) float64 1.0 2.0 2.0
 
         Use ``skipna`` to control whether NaNs are ignored.
 
-        >>> ds.median(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  ()
-        Data variables:
-            da       float64 nan
-        """
-        return self.reduce(
-            duck_array_ops.median,
-            dim=dim,
-            skipna=skipna,
-            numeric_only=True,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-
-class DataArrayReductions:
-    __slots__ = ()
-
-    def reduce(
-        self,
-        func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        *,
-        axis: Union[None, int, Sequence[int]] = None,
-        keep_attrs: bool = None,
-        keepdims: bool = False,
-        **kwargs: Any,
-    ) -> "DataArray":
-        raise NotImplementedError()
-
-    def count(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``count`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``count`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``count`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.count
-        dask.array.count
-        Dataset.count
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.count()
-        <xarray.DataArray ()>
-        array(5)
-        """
-        return self.reduce(
-            duck_array_ops.count,
-            dim=dim,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def all(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``all`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``all`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``all`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.all
-        dask.array.all
-        Dataset.all
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([True, True, True, True, True, False], dtype=bool),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ True,  True,  True,  True,  True, False])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.all()
-        <xarray.DataArray ()>
-        array(False)
-        """
-        return self.reduce(
-            duck_array_ops.array_all,
-            dim=dim,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def any(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``any`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``any`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``any`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.any
-        dask.array.any
-        Dataset.any
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([True, True, True, True, True, False], dtype=bool),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ True,  True,  True,  True,  True, False])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.any()
-        <xarray.DataArray ()>
-        array(True)
-        """
-        return self.reduce(
-            duck_array_ops.array_any,
-            dim=dim,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def max(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``max`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``max`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``max`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.max
-        dask.array.max
-        Dataset.max
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.max()
-        <xarray.DataArray ()>
-        array(3.)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.max(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-        """
-        return self.reduce(
-            duck_array_ops.max,
-            dim=dim,
-            skipna=skipna,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def min(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``min`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``min`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``min`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.min
-        dask.array.min
-        Dataset.min
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.min()
-        <xarray.DataArray ()>
-        array(1.)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.min(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-        """
-        return self.reduce(
-            duck_array_ops.min,
-            dim=dim,
-            skipna=skipna,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def mean(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``mean`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``mean`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``mean`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.mean
-        dask.array.mean
-        Dataset.mean
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.mean()
-        <xarray.DataArray ()>
-        array(1.8)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.mean(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-        """
-        return self.reduce(
-            duck_array_ops.mean,
-            dim=dim,
-            skipna=skipna,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def prod(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        min_count: Optional[int] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``prod`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        min_count : int, default: None
-            The required number of valid values to perform the operation. If
-            fewer than min_count non-NA values are present the result will be
-            NA. Only used if skipna is set to True or defaults to True for the
-            array's dtype. Changed in version 0.17.0: if specified on an integer
-            array and skipna=True, the result will be a float array.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``prod`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``prod`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.prod
-        dask.array.prod
-        Dataset.prod
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.prod()
-        <xarray.DataArray ()>
-        array(12.)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.prod(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-
-        Specify ``min_count`` for finer control over when NaNs are ignored.
-
-        >>> da.prod(skipna=True, min_count=2)
-        <xarray.DataArray ()>
-        array(12.)
-        """
-        return self.reduce(
-            duck_array_ops.prod,
-            dim=dim,
-            skipna=skipna,
-            min_count=min_count,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def sum(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        min_count: Optional[int] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``sum`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        min_count : int, default: None
-            The required number of valid values to perform the operation. If
-            fewer than min_count non-NA values are present the result will be
-            NA. Only used if skipna is set to True or defaults to True for the
-            array's dtype. Changed in version 0.17.0: if specified on an integer
-            array and skipna=True, the result will be a float array.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``sum`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``sum`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.sum
-        dask.array.sum
-        Dataset.sum
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.sum()
-        <xarray.DataArray ()>
-        array(9.)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.sum(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-
-        Specify ``min_count`` for finer control over when NaNs are ignored.
-
-        >>> da.sum(skipna=True, min_count=2)
-        <xarray.DataArray ()>
-        array(9.)
-        """
-        return self.reduce(
-            duck_array_ops.sum,
-            dim=dim,
-            skipna=skipna,
-            min_count=min_count,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def std(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``std`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``std`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``std`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.std
-        dask.array.std
-        Dataset.std
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.std()
-        <xarray.DataArray ()>
-        array(0.74833148)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.std(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> da.std(skipna=True, ddof=1)
-        <xarray.DataArray ()>
-        array(0.83666003)
-        """
-        return self.reduce(
-            duck_array_ops.std,
-            dim=dim,
-            skipna=skipna,
-            ddof=ddof,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def var(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``var`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``var`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``var`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.var
-        dask.array.var
-        Dataset.var
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.var()
-        <xarray.DataArray ()>
-        array(0.56)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.var(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> da.var(skipna=True, ddof=1)
-        <xarray.DataArray ()>
-        array(0.7)
-        """
-        return self.reduce(
-            duck_array_ops.var,
-            dim=dim,
-            skipna=skipna,
-            ddof=ddof,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-    def median(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "DataArray":
-        """
-        Reduce this DataArray's data by applying ``median`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``median`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray with ``median`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.median
-        dask.array.median
-        Dataset.median
-        :ref:`agg`
-            User guide on reduction or aggregation operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> da
-        <xarray.DataArray (time: 6)>
-        array([ 1.,  2.,  3.,  1.,  2., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-
-        >>> da.median()
-        <xarray.DataArray ()>
-        array(2.)
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> da.median(skipna=False)
-        <xarray.DataArray ()>
-        array(nan)
-        """
-        return self.reduce(
-            duck_array_ops.median,
-            dim=dim,
-            skipna=skipna,
-            keep_attrs=keep_attrs,
-            **kwargs,
-        )
-
-
-class DatasetGroupByReductions:
-    _obj: "Dataset"
-
-    def reduce(
-        self,
-        func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        *,
-        axis: Union[None, int, Sequence[int]] = None,
-        keep_attrs: bool = None,
-        keepdims: bool = False,
-        **kwargs: Any,
-    ) -> "Dataset":
-        raise NotImplementedError()
-
-    def _flox_reduce(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]],
-        **kwargs,
-    ) -> "Dataset":
-        raise NotImplementedError()
-
-    def count(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``count`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``count`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``count`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.count
-        dask.array.count
-        Dataset.count
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").count()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) int64 1 2 2
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="count",
-                dim=dim,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.count,
-                dim=dim,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def all(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``all`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``all`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``all`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.all
-        dask.array.all
-        Dataset.all
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([True, True, True, True, True, False], dtype=bool),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) bool True True True True True False
-
-        >>> ds.groupby("labels").all()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) bool False True True
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="all",
-                dim=dim,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_all,
-                dim=dim,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def any(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``any`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``any`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``any`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.any
-        dask.array.any
-        Dataset.any
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([True, True, True, True, True, False], dtype=bool),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) bool True True True True True False
-
-        >>> ds.groupby("labels").any()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) bool True True True
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="any",
-                dim=dim,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_any,
-                dim=dim,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def max(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``max`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``max`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``max`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.max
-        dask.array.max
-        Dataset.max
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").max()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 1.0 2.0 3.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").max(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 2.0 3.0
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="max",
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.max,
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def min(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``min`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``min`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``min`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.min
-        dask.array.min
-        Dataset.min
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").min()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 1.0 2.0 1.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").min(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 2.0 1.0
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="min",
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.min,
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def mean(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``mean`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``mean`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``mean`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.mean
-        dask.array.mean
-        Dataset.mean
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").mean()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 1.0 2.0 2.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").mean(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 2.0 2.0
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="mean",
-                dim=dim,
-                skipna=skipna,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.mean,
-                dim=dim,
-                skipna=skipna,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def prod(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        min_count: Optional[int] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``prod`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        min_count : int, default: None
-            The required number of valid values to perform the operation. If
-            fewer than min_count non-NA values are present the result will be
-            NA. Only used if skipna is set to True or defaults to True for the
-            array's dtype. Changed in version 0.17.0: if specified on an integer
-            array and skipna=True, the result will be a float array.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``prod`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``prod`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.prod
-        dask.array.prod
-        Dataset.prod
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").prod()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 1.0 4.0 3.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").prod(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 4.0 3.0
-
-        Specify ``min_count`` for finer control over when NaNs are ignored.
-
-        >>> ds.groupby("labels").prod(skipna=True, min_count=2)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 4.0 3.0
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="prod",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.prod,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def sum(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        min_count: Optional[int] = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``sum`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        min_count : int, default: None
-            The required number of valid values to perform the operation. If
-            fewer than min_count non-NA values are present the result will be
-            NA. Only used if skipna is set to True or defaults to True for the
-            array's dtype. Changed in version 0.17.0: if specified on an integer
-            array and skipna=True, the result will be a float array.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``sum`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``sum`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.sum
-        dask.array.sum
-        Dataset.sum
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").sum()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 1.0 4.0 4.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").sum(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 4.0 4.0
-
-        Specify ``min_count`` for finer control over when NaNs are ignored.
-
-        >>> ds.groupby("labels").sum(skipna=True, min_count=2)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 4.0 4.0
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="sum",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.sum,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def std(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``std`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``std`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``std`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.std
-        dask.array.std
-        Dataset.std
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").std()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 0.0 0.0 1.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").std(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 0.0 1.0
-
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> ds.groupby("labels").std(skipna=True, ddof=1)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 0.0 1.414
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="std",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.std,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def var(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``var`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``var`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``var`` applied to its data and the
-            indicated dimension(s) removed
-
-        See Also
-        --------
-        numpy.var
-        dask.array.var
-        Dataset.var
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").var()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 0.0 0.0 1.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").var(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 0.0 1.0
-
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> ds.groupby("labels").var(skipna=True, ddof=1)
+        >>> ds.groupby("labels").median(skipna=False)
         <xarray.Dataset>
         Dimensions:  (labels: 3)
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
         Data variables:
-            da       (labels) float64 nan 0.0 2.0
-        """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="var",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.var,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-
-    def median(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        keep_attrs: bool = None,
-        **kwargs,
-    ) -> "Dataset":
-        """
-        Reduce this Dataset's data by applying ``median`` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : hashable or iterable of hashable, optional
-            Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, ``attrs`` will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating ``median`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
-
-        Returns
-        -------
-        reduced : Dataset
-            New Dataset with ``median`` applied to its data and the
-            indicated dimension(s) removed
+            da       (labels) float64 nan 2.0 2.0
 
         See Also
         --------
         numpy.median
-        dask.array.median
         Dataset.median
         :ref:`groupby`
             User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
-        Examples
-        --------
-        >>> da = xr.DataArray(
-        ...     np.array([1, 2, 3, 1, 2, np.nan]),
-        ...     dims="time",
-        ...     coords=dict(
-        ...         time=("time", pd.date_range("01-01-2001", freq="M", periods=6)),
-        ...         labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
-        ...     ),
-        ... )
-        >>> ds = xr.Dataset(dict(da=da))
-        >>> ds
-        <xarray.Dataset>
-        Dimensions:  (time: 6)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30
-            labels   (time) <U1 'a' 'b' 'c' 'c' 'b' 'a'
-        Data variables:
-            da       (time) float64 1.0 2.0 3.0 1.0 2.0 nan
-
-        >>> ds.groupby("labels").median()
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 1.0 2.0 2.0
-
-        Use ``skipna`` to control whether NaNs are ignored.
-
-        >>> ds.groupby("labels").median(skipna=False)
-        <xarray.Dataset>
-        Dimensions:  (labels: 3)
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
-        Data variables:
-            da       (labels) float64 nan 2.0 2.0
         """
         return self.reduce(
             duck_array_ops.median,
@@ -3099,33 +989,14 @@ def median(
 
 
 class DatasetResampleReductions:
-    _obj: "Dataset"
-
-    def reduce(
-        self,
-        func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        *,
-        axis: Union[None, int, Sequence[int]] = None,
-        keep_attrs: bool = None,
-        keepdims: bool = False,
-        **kwargs: Any,
-    ) -> "Dataset":
-        raise NotImplementedError()
-
-    def _flox_reduce(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]],
-        **kwargs,
-    ) -> "Dataset":
-        raise NotImplementedError()
+    __slots__ = ()
 
     def count(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``count`` along some dimension(s).
 
@@ -3133,7 +1004,8 @@ def count(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -3141,7 +1013,6 @@ def count(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``count`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3149,14 +1020,6 @@ def count(
             New Dataset with ``count`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.count
-        dask.array.count
-        Dataset.count
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3184,32 +1047,28 @@ def count(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) int64 1 3 1
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="count",
-                dim=dim,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.count,
-                dim=dim,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.count
+        Dataset.count
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.count,
+            dim=dim,
+            numeric_only=False,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def all(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``all`` along some dimension(s).
 
@@ -3217,7 +1076,8 @@ def all(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -3225,7 +1085,6 @@ def all(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``all`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3233,14 +1092,6 @@ def all(
             New Dataset with ``all`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.all
-        dask.array.all
-        Dataset.all
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3268,32 +1119,28 @@ def all(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) bool True True False
+
+        See Also
+        --------
+        numpy.all
+        Dataset.all
+        :ref:`resampling`
+            User guide on resampling operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="all",
-                dim=dim,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_all,
-                dim=dim,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.array_all,
+            dim=dim,
+            numeric_only=False,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def any(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``any`` along some dimension(s).
 
@@ -3301,7 +1148,8 @@ def any(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -3309,7 +1157,6 @@ def any(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``any`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3317,14 +1164,6 @@ def any(
             New Dataset with ``any`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.any
-        dask.array.any
-        Dataset.any
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3352,33 +1191,29 @@ def any(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) bool True True True
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="any",
-                dim=dim,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_any,
-                dim=dim,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.any
+        Dataset.any
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.array_any,
+            dim=dim,
+            numeric_only=False,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def max(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``max`` along some dimension(s).
 
@@ -3386,11 +1221,12 @@ def max(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -3399,7 +1235,6 @@ def max(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``max`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3407,14 +1242,6 @@ def max(
             New Dataset with ``max`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.max
-        dask.array.max
-        Dataset.max
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3452,35 +1279,30 @@ def max(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) float64 1.0 3.0 nan
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="max",
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.max,
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.max
+        Dataset.max
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.max,
+            dim=dim,
+            skipna=skipna,
+            numeric_only=False,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def min(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``min`` along some dimension(s).
 
@@ -3488,11 +1310,12 @@ def min(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -3501,7 +1324,6 @@ def min(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``min`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3509,14 +1331,6 @@ def min(
             New Dataset with ``min`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.min
-        dask.array.min
-        Dataset.min
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3554,35 +1368,30 @@ def min(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) float64 1.0 1.0 nan
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="min",
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.min,
-                dim=dim,
-                skipna=skipna,
-                numeric_only=False,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.min
+        Dataset.min
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.min,
+            dim=dim,
+            skipna=skipna,
+            numeric_only=False,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def mean(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``mean`` along some dimension(s).
 
@@ -3590,11 +1399,12 @@ def mean(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -3603,7 +1413,6 @@ def mean(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``mean`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3611,18 +1420,6 @@ def mean(
             New Dataset with ``mean`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.mean
-        dask.array.mean
-        Dataset.mean
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3660,36 +1457,31 @@ def mean(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) float64 1.0 2.0 nan
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="mean",
-                dim=dim,
-                skipna=skipna,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.mean,
-                dim=dim,
-                skipna=skipna,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.mean
+        Dataset.mean
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.mean,
+            dim=dim,
+            skipna=skipna,
+            numeric_only=True,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def prod(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``prod`` along some dimension(s).
 
@@ -3697,11 +1489,12 @@ def prod(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -3716,7 +1509,6 @@ def prod(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``prod`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3724,18 +1516,6 @@ def prod(
             New Dataset with ``prod`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.prod
-        dask.array.prod
-        Dataset.prod
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3783,38 +1563,32 @@ def prod(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) float64 nan 6.0 nan
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="prod",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.prod,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.prod
+        Dataset.prod
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.prod,
+            dim=dim,
+            skipna=skipna,
+            min_count=min_count,
+            numeric_only=True,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def sum(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``sum`` along some dimension(s).
 
@@ -3822,11 +1596,12 @@ def sum(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -3841,7 +1616,6 @@ def sum(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``sum`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3849,18 +1623,6 @@ def sum(
             New Dataset with ``sum`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.sum
-        dask.array.sum
-        Dataset.sum
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -3908,38 +1670,31 @@ def sum(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) float64 nan 6.0 nan
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="sum",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.sum,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.sum
+        Dataset.sum
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.sum,
+            dim=dim,
+            skipna=skipna,
+            min_count=min_count,
+            numeric_only=True,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def std(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``std`` along some dimension(s).
 
@@ -3947,15 +1702,13 @@ def std(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -3963,7 +1716,6 @@ def std(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``std`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -3971,18 +1723,6 @@ def std(
             New Dataset with ``std`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.std
-        dask.array.std
-        Dataset.std
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4021,47 +1761,29 @@ def std(
         Data variables:
             da       (time) float64 0.0 0.8165 nan
 
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> ds.resample(time="3M").std(skipna=True, ddof=1)
-        <xarray.Dataset>
-        Dimensions:  (time: 3)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        Data variables:
-            da       (time) float64 nan 1.0 nan
+        See Also
+        --------
+        numpy.std
+        Dataset.std
+        :ref:`resampling`
+            User guide on resampling operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="std",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.std,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.std,
+            dim=dim,
+            skipna=skipna,
+            numeric_only=True,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def var(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``var`` along some dimension(s).
 
@@ -4069,15 +1791,13 @@ def var(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -4085,7 +1805,6 @@ def var(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``var`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4093,18 +1812,6 @@ def var(
             New Dataset with ``var`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.var
-        dask.array.var
-        Dataset.var
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4143,46 +1850,29 @@ def var(
         Data variables:
             da       (time) float64 0.0 0.6667 nan
 
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> ds.resample(time="3M").var(skipna=True, ddof=1)
-        <xarray.Dataset>
-        Dimensions:  (time: 3)
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        Data variables:
-            da       (time) float64 nan 1.0 nan
+        See Also
+        --------
+        numpy.var
+        Dataset.var
+        :ref:`resampling`
+            User guide on resampling operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="var",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.var,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=True,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.var,
+            dim=dim,
+            skipna=skipna,
+            numeric_only=True,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def median(
-        self,
+        self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "Dataset":
+    ) -> T_Dataset:
         """
         Reduce this Dataset's data by applying ``median`` along some dimension(s).
 
@@ -4190,11 +1880,12 @@ def median(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -4203,7 +1894,6 @@ def median(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``median`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4211,18 +1901,6 @@ def median(
             New Dataset with ``median`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.median
-        dask.array.median
-        Dataset.median
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4260,6 +1938,13 @@ def median(
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
         Data variables:
             da       (time) float64 1.0 2.0 nan
+
+        See Also
+        --------
+        numpy.median
+        Dataset.median
+        :ref:`resampling`
+            User guide on resampling operations.
         """
         return self.reduce(
             duck_array_ops.median,
@@ -4271,34 +1956,28 @@ def median(
         )
 
 
-class DataArrayGroupByReductions:
-    _obj: "DataArray"
-
+class DataArrayReduce(Protocol):
     def reduce(
         self,
         func: Callable[..., Any],
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        *,
         axis: Union[None, int, Sequence[int]] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         **kwargs: Any,
-    ) -> "DataArray":
-        raise NotImplementedError()
+    ) -> T_DataArray:
+        ...
 
-    def _flox_reduce(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]],
-        **kwargs,
-    ) -> "DataArray":
-        raise NotImplementedError()
+
+class DataArrayGroupByReductions:
+    __slots__ = ()
 
     def count(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``count`` along some dimension(s).
 
@@ -4306,7 +1985,8 @@ def count(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -4314,7 +1994,6 @@ def count(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``count`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4322,14 +2001,6 @@ def count(
             New DataArray with ``count`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.count
-        dask.array.count
-        DataArray.count
-        :ref:`groupby`
-            User guide on groupby operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4352,30 +2023,27 @@ def count(
         array([1, 2, 2])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
+
+        See Also
+        --------
+        numpy.count
+        DataArray.count
+        :ref:`groupby`
+            User guide on groupby operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="count",
-                dim=dim,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.count,
-                dim=dim,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.count,
+            dim=dim,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def all(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``all`` along some dimension(s).
 
@@ -4383,7 +2051,8 @@ def all(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -4391,7 +2060,6 @@ def all(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``all`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4399,14 +2067,6 @@ def all(
             New DataArray with ``all`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.all
-        dask.array.all
-        DataArray.all
-        :ref:`groupby`
-            User guide on groupby operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4429,30 +2089,27 @@ def all(
         array([False,  True,  True])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="all",
-                dim=dim,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_all,
-                dim=dim,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.all
+        DataArray.all
+        :ref:`groupby`
+            User guide on groupby operations.
+        """
+        return self.reduce(
+            duck_array_ops.array_all,
+            dim=dim,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def any(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``any`` along some dimension(s).
 
@@ -4460,7 +2117,8 @@ def any(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -4468,7 +2126,6 @@ def any(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``any`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4476,14 +2133,6 @@ def any(
             New DataArray with ``any`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.any
-        dask.array.any
-        DataArray.any
-        :ref:`groupby`
-            User guide on groupby operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4506,31 +2155,28 @@ def any(
         array([ True,  True,  True])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="any",
-                dim=dim,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_any,
-                dim=dim,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.any
+        DataArray.any
+        :ref:`groupby`
+            User guide on groupby operations.
+        """
+        return self.reduce(
+            duck_array_ops.array_any,
+            dim=dim,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def max(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``max`` along some dimension(s).
 
@@ -4538,11 +2184,12 @@ def max(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -4551,7 +2198,6 @@ def max(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``max`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4559,14 +2205,6 @@ def max(
             New DataArray with ``max`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.max
-        dask.array.max
-        DataArray.max
-        :ref:`groupby`
-            User guide on groupby operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4597,33 +2235,29 @@ def max(
         array([nan,  2.,  3.])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="max",
-                dim=dim,
-                skipna=skipna,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.max,
-                dim=dim,
-                skipna=skipna,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.max
+        DataArray.max
+        :ref:`groupby`
+            User guide on groupby operations.
+        """
+        return self.reduce(
+            duck_array_ops.max,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def min(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``min`` along some dimension(s).
 
@@ -4631,11 +2265,12 @@ def min(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -4644,7 +2279,6 @@ def min(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``min`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4652,14 +2286,6 @@ def min(
             New DataArray with ``min`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.min
-        dask.array.min
-        DataArray.min
-        :ref:`groupby`
-            User guide on groupby operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4690,33 +2316,29 @@ def min(
         array([nan,  2.,  1.])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="min",
-                dim=dim,
-                skipna=skipna,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.min,
-                dim=dim,
-                skipna=skipna,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.min
+        DataArray.min
+        :ref:`groupby`
+            User guide on groupby operations.
+        """
+        return self.reduce(
+            duck_array_ops.min,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def mean(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``mean`` along some dimension(s).
 
@@ -4724,11 +2346,12 @@ def mean(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -4737,7 +2360,6 @@ def mean(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``mean`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4745,18 +2367,6 @@ def mean(
             New DataArray with ``mean`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.mean
-        dask.array.mean
-        DataArray.mean
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4787,34 +2397,30 @@ def mean(
         array([nan,  2.,  2.])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="mean",
-                dim=dim,
-                skipna=skipna,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.mean,
-                dim=dim,
-                skipna=skipna,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.mean
+        DataArray.mean
+        :ref:`groupby`
+            User guide on groupby operations.
+        """
+        return self.reduce(
+            duck_array_ops.mean,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def prod(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``prod`` along some dimension(s).
 
@@ -4822,11 +2428,12 @@ def prod(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -4841,7 +2448,6 @@ def prod(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``prod`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4849,18 +2455,6 @@ def prod(
             New DataArray with ``prod`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.prod
-        dask.array.prod
-        DataArray.prod
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -4899,36 +2493,31 @@ def prod(
         array([nan,  4.,  3.])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="prod",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.prod,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.prod
+        DataArray.prod
+        :ref:`groupby`
+            User guide on groupby operations.
+        """
+        return self.reduce(
+            duck_array_ops.prod,
+            dim=dim,
+            skipna=skipna,
+            min_count=min_count,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def sum(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``sum`` along some dimension(s).
 
@@ -4936,11 +2525,12 @@ def sum(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -4955,7 +2545,6 @@ def sum(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``sum`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -4963,18 +2552,6 @@ def sum(
             New DataArray with ``sum`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.sum
-        dask.array.sum
-        DataArray.sum
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5013,36 +2590,30 @@ def sum(
         array([nan,  4.,  4.])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="sum",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.sum,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.sum
+        DataArray.sum
+        :ref:`groupby`
+            User guide on groupby operations.
+        """
+        return self.reduce(
+            duck_array_ops.sum,
+            dim=dim,
+            skipna=skipna,
+            min_count=min_count,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def std(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``std`` along some dimension(s).
 
@@ -5050,15 +2621,13 @@ def std(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -5066,7 +2635,6 @@ def std(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``std`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5074,18 +2642,6 @@ def std(
             New DataArray with ``std`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.std
-        dask.array.std
-        DataArray.std
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5117,43 +2673,28 @@ def std(
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
 
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> da.groupby("labels").std(skipna=True, ddof=1)
-        <xarray.DataArray (labels: 3)>
-        array([       nan, 0.        , 1.41421356])
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
+        See Also
+        --------
+        numpy.std
+        DataArray.std
+        :ref:`groupby`
+            User guide on groupby operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="std",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.std,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.std,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def var(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``var`` along some dimension(s).
 
@@ -5161,15 +2702,13 @@ def var(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -5177,7 +2716,6 @@ def var(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``var`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5185,18 +2723,6 @@ def var(
             New DataArray with ``var`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.var
-        dask.array.var
-        DataArray.var
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5228,42 +2754,28 @@ def var(
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
 
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> da.groupby("labels").var(skipna=True, ddof=1)
-        <xarray.DataArray (labels: 3)>
-        array([nan,  0.,  2.])
-        Coordinates:
-          * labels   (labels) object 'a' 'b' 'c'
+        See Also
+        --------
+        numpy.var
+        DataArray.var
+        :ref:`groupby`
+            User guide on groupby operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="var",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.var,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.var,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def median(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``median`` along some dimension(s).
 
@@ -5271,11 +2783,12 @@ def median(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -5284,7 +2797,6 @@ def median(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``median`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5292,18 +2804,6 @@ def median(
             New DataArray with ``median`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.median
-        dask.array.median
-        DataArray.median
-        :ref:`groupby`
-            User guide on groupby operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5334,6 +2834,13 @@ def median(
         array([nan,  2.,  2.])
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
+
+        See Also
+        --------
+        numpy.median
+        DataArray.median
+        :ref:`groupby`
+            User guide on groupby operations.
         """
         return self.reduce(
             duck_array_ops.median,
@@ -5345,33 +2852,14 @@ def median(
 
 
 class DataArrayResampleReductions:
-    _obj: "DataArray"
-
-    def reduce(
-        self,
-        func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        *,
-        axis: Union[None, int, Sequence[int]] = None,
-        keep_attrs: bool = None,
-        keepdims: bool = False,
-        **kwargs: Any,
-    ) -> "DataArray":
-        raise NotImplementedError()
-
-    def _flox_reduce(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]],
-        **kwargs,
-    ) -> "DataArray":
-        raise NotImplementedError()
+    __slots__ = ()
 
     def count(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``count`` along some dimension(s).
 
@@ -5379,7 +2867,8 @@ def count(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -5387,7 +2876,6 @@ def count(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``count`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5395,14 +2883,6 @@ def count(
             New DataArray with ``count`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.count
-        dask.array.count
-        DataArray.count
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5425,30 +2905,27 @@ def count(
         array([1, 3, 1])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="count",
-                dim=dim,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.count,
-                dim=dim,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.count
+        DataArray.count
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.count,
+            dim=dim,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def all(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``all`` along some dimension(s).
 
@@ -5456,7 +2933,8 @@ def all(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -5464,7 +2942,6 @@ def all(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``all`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5472,14 +2949,6 @@ def all(
             New DataArray with ``all`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.all
-        dask.array.all
-        DataArray.all
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5502,30 +2971,27 @@ def all(
         array([ True,  True, False])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="all",
-                dim=dim,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_all,
-                dim=dim,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.all
+        DataArray.all
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.array_all,
+            dim=dim,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def any(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``any`` along some dimension(s).
 
@@ -5533,7 +2999,8 @@ def any(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -5541,7 +3008,6 @@ def any(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``any`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5549,14 +3015,6 @@ def any(
             New DataArray with ``any`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.any
-        dask.array.any
-        DataArray.any
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5579,31 +3037,28 @@ def any(
         array([ True,  True,  True])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="any",
-                dim=dim,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.array_any,
-                dim=dim,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.any
+        DataArray.any
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.array_any,
+            dim=dim,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def max(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``max`` along some dimension(s).
 
@@ -5611,11 +3066,12 @@ def max(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -5624,7 +3080,6 @@ def max(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``max`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5632,14 +3087,6 @@ def max(
             New DataArray with ``max`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.max
-        dask.array.max
-        DataArray.max
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5670,33 +3117,29 @@ def max(
         array([ 1.,  3., nan])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="max",
-                dim=dim,
-                skipna=skipna,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.max,
-                dim=dim,
-                skipna=skipna,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.max
+        DataArray.max
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.max,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def min(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``min`` along some dimension(s).
 
@@ -5704,11 +3147,12 @@ def min(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -5717,7 +3161,6 @@ def min(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``min`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5725,14 +3168,6 @@ def min(
             New DataArray with ``min`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.min
-        dask.array.min
-        DataArray.min
-        :ref:`resampling`
-            User guide on resampling operations.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5763,33 +3198,29 @@ def min(
         array([ 1.,  1., nan])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="min",
-                dim=dim,
-                skipna=skipna,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.min,
-                dim=dim,
-                skipna=skipna,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.min
+        DataArray.min
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.min,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def mean(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``mean`` along some dimension(s).
 
@@ -5797,11 +3228,12 @@ def mean(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -5810,7 +3242,6 @@ def mean(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``mean`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5818,18 +3249,6 @@ def mean(
             New DataArray with ``mean`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.mean
-        dask.array.mean
-        DataArray.mean
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5860,34 +3279,30 @@ def mean(
         array([ 1.,  2., nan])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="mean",
-                dim=dim,
-                skipna=skipna,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.mean,
-                dim=dim,
-                skipna=skipna,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.mean
+        DataArray.mean
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.mean,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def prod(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``prod`` along some dimension(s).
 
@@ -5895,11 +3310,12 @@ def prod(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -5914,7 +3330,6 @@ def prod(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``prod`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -5922,18 +3337,6 @@ def prod(
             New DataArray with ``prod`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.prod
-        dask.array.prod
-        DataArray.prod
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -5972,36 +3375,31 @@ def prod(
         array([nan,  6., nan])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="prod",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.prod,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.prod
+        DataArray.prod
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.prod,
+            dim=dim,
+            skipna=skipna,
+            min_count=min_count,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def sum(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``sum`` along some dimension(s).
 
@@ -6009,11 +3407,12 @@ def sum(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         min_count : int, default: None
             The required number of valid values to perform the operation. If
@@ -6028,7 +3427,6 @@ def sum(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``sum`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -6036,18 +3434,6 @@ def sum(
             New DataArray with ``sum`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.sum
-        dask.array.sum
-        DataArray.sum
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -6086,36 +3472,30 @@ def sum(
         array([nan,  6., nan])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
-        """
 
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="sum",
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.sum,
-                dim=dim,
-                skipna=skipna,
-                min_count=min_count,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        See Also
+        --------
+        numpy.sum
+        DataArray.sum
+        :ref:`resampling`
+            User guide on resampling operations.
+        """
+        return self.reduce(
+            duck_array_ops.sum,
+            dim=dim,
+            skipna=skipna,
+            min_count=min_count,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def std(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``std`` along some dimension(s).
 
@@ -6123,15 +3503,13 @@ def std(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -6139,7 +3517,6 @@ def std(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``std`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -6147,18 +3524,6 @@ def std(
             New DataArray with ``std`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.std
-        dask.array.std
-        DataArray.std
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -6190,43 +3555,28 @@ def std(
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
 
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> da.resample(time="3M").std(skipna=True, ddof=1)
-        <xarray.DataArray (time: 3)>
-        array([nan,  1., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
+        See Also
+        --------
+        numpy.std
+        DataArray.std
+        :ref:`resampling`
+            User guide on resampling operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="std",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.std,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.std,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def var(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
-        ddof: int = 0,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``var`` along some dimension(s).
 
@@ -6234,15 +3584,13 @@ def var(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
-        ddof : int, default: 0
-            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
-            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -6250,7 +3598,6 @@ def var(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``var`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -6258,18 +3605,6 @@ def var(
             New DataArray with ``var`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.var
-        dask.array.var
-        DataArray.var
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -6301,42 +3636,28 @@ def var(
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
 
-        Specify ``ddof=1`` for an unbiased estimate.
-
-        >>> da.resample(time="3M").var(skipna=True, ddof=1)
-        <xarray.DataArray (time: 3)>
-        array([nan,  1., nan])
-        Coordinates:
-          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
+        See Also
+        --------
+        numpy.var
+        DataArray.var
+        :ref:`resampling`
+            User guide on resampling operations.
         """
-
-        if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj):
-            return self._flox_reduce(
-                func="var",
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                # fill_value=fill_value,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
-        else:
-            return self.reduce(
-                duck_array_ops.var,
-                dim=dim,
-                skipna=skipna,
-                ddof=ddof,
-                keep_attrs=keep_attrs,
-                **kwargs,
-            )
+        return self.reduce(
+            duck_array_ops.var,
+            dim=dim,
+            skipna=skipna,
+            keep_attrs=keep_attrs,
+            **kwargs,
+        )
 
     def median(
-        self,
+        self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        skipna: bool = None,
+        skipna: bool = True,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> "DataArray":
+    ) -> T_DataArray:
         """
         Reduce this DataArray's data by applying ``median`` along some dimension(s).
 
@@ -6344,11 +3665,12 @@ def median(
         ----------
         dim : hashable or iterable of hashable, optional
             Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
-        skipna : bool, default: None
+            or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions
+            present in the grouped variable.
+        skipna : bool, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or ``skipna=True`` has not been
+            have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
@@ -6357,7 +3679,6 @@ def median(
         **kwargs : dict
             Additional keyword arguments passed on to the appropriate array
             function for calculating ``median`` on this object's data.
-            These could include dask-specific kwargs like ``split_every``.
 
         Returns
         -------
@@ -6365,18 +3686,6 @@ def median(
             New DataArray with ``median`` applied to its data and the
             indicated dimension(s) removed
 
-        See Also
-        --------
-        numpy.median
-        dask.array.median
-        DataArray.median
-        :ref:`resampling`
-            User guide on resampling operations.
-
-        Notes
-        -----
-        Non-numeric variables will be removed prior to reducing.
-
         Examples
         --------
         >>> da = xr.DataArray(
@@ -6407,6 +3716,13 @@ def median(
         array([ 1.,  2., nan])
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
+
+        See Also
+        --------
+        numpy.median
+        DataArray.median
+        :ref:`resampling`
+            User guide on resampling operations.
         """
         return self.reduce(
             duck_array_ops.median,
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
index 9c9de76c0ed..54c9b857a7a 100644
--- a/xarray/core/accessor_str.py
+++ b/xarray/core/accessor_str.py
@@ -456,7 +456,7 @@ def cat(
             Strings or array-like of strings to concatenate elementwise with
             the current DataArray.
         sep : str or array-like of str, default: "".
-            Seperator to use between strings.
+            Separator to use between strings.
             It is broadcast in the same way as the other input strings.
             If array-like, its dimensions will be placed at the end of the output array dimensions.
 
@@ -539,7 +539,7 @@ def join(
             Only one dimension is allowed at a time.
             Optional for 0D or 1D DataArrays, required for multidimensional DataArrays.
         sep : str or array-like, default: "".
-            Seperator to use between strings.
+            Separator to use between strings.
             It is broadcast in the same way as the other input strings.
             If array-like, its dimensions will be placed at the end of the output array dimensions.
 
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index a53ac094253..f9342e2a82a 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -7,6 +7,7 @@
     Any,
     Dict,
     Hashable,
+    Iterable,
     Mapping,
     Optional,
     Tuple,
@@ -504,7 +505,7 @@ def reindex_variables(
     indexes: Mapping[Any, Index],
     indexers: Mapping,
     method: Optional[str] = None,
-    tolerance: Any = None,
+    tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None,
     copy: bool = True,
     fill_value: Optional[Any] = dtypes.NA,
     sparse: bool = False,
@@ -538,6 +539,10 @@ def reindex_variables(
         Maximum distance between original and new labels for inexact matches.
         The values of the index at the matching locations must satisfy the
         equation ``abs(index[indexer] - target) <= tolerance``.
+        Tolerance may be a scalar value, which applies the same tolerance
+        to all values, or list-like, which applies variable tolerance per
+        element. List-like must be the same size as the index and its dtype
+        must exactly match the index’s type.
     copy : bool, optional
         If ``copy=True``, data in the return values is always copied. If
         ``copy=False`` and reindexing is unnecessary, or can be performed
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 081b53391ba..d23a58522e6 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -135,7 +135,7 @@ def _infer_concat_order_from_coords(datasets):
                 order = rank.astype(int).values - 1
 
                 # Append positions along extra dimension to structure which
-                # encodes the multi-dimensional concatentation order
+                # encodes the multi-dimensional concatenation order
                 tile_ids = [
                     tile_id + (position,) for tile_id, position in zip(tile_ids, order)
                 ]
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 2300f3dd8f5..cb6da986892 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -8,16 +8,11 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
     Hashable,
     Iterable,
     Iterator,
-    List,
     Mapping,
-    Optional,
-    Tuple,
     TypeVar,
-    Union,
     overload,
 )
 
@@ -172,9 +167,7 @@ def __iter__(self: Any) -> Iterator[Any]:
             raise TypeError("iteration over a 0-d array")
         return self._iter()
 
-    def get_axis_num(
-        self, dim: Union[Hashable, Iterable[Hashable]]
-    ) -> Union[int, Tuple[int, ...]]:
+    def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, ...]:
         """Return axis number(s) corresponding to dimension(s) in this array.
 
         Parameters
@@ -252,7 +245,7 @@ def __getattr__(self, name: str) -> Any:
                 with suppress(KeyError):
                     return source[name]
         raise AttributeError(
-            "{!r} object has no attribute {!r}".format(type(self).__name__, name)
+            f"{type(self).__name__!r} object has no attribute {name!r}"
         )
 
     # This complicated two-method design boosts overall performance of simple operations
@@ -292,37 +285,37 @@ def __setattr__(self, name: str, value: Any) -> None:
                 "assignment (e.g., `ds['name'] = ...`) instead of assigning variables."
             ) from e
 
-    def __dir__(self) -> List[str]:
+    def __dir__(self) -> list[str]:
         """Provide method name lookup and completion. Only provide 'public'
         methods.
         """
-        extra_attrs = set(
+        extra_attrs = {
             item
             for source in self._attr_sources
             for item in source
             if isinstance(item, str)
-        )
+        }
         return sorted(set(dir(type(self))) | extra_attrs)
 
-    def _ipython_key_completions_(self) -> List[str]:
+    def _ipython_key_completions_(self) -> list[str]:
         """Provide method for the key-autocompletions in IPython.
         See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion
         For the details.
         """
-        items = set(
+        items = {
             item
             for source in self._item_sources
             for item in source
             if isinstance(item, str)
-        )
+        }
         return list(items)
 
 
 def get_squeeze_dims(
     xarray_obj,
-    dim: Union[Hashable, Iterable[Hashable], None] = None,
-    axis: Union[int, Iterable[int], None] = None,
-) -> List[Hashable]:
+    dim: Hashable | Iterable[Hashable] | None = None,
+    axis: int | Iterable[int] | None = None,
+) -> list[Hashable]:
     """Get a list of dimensions to squeeze out."""
     if dim is not None and axis is not None:
         raise ValueError("cannot use both parameters `axis` and `dim`")
@@ -354,15 +347,15 @@ def get_squeeze_dims(
 class DataWithCoords(AttrAccessMixin):
     """Shared base class for Dataset and DataArray."""
 
-    _close: Optional[Callable[[], None]]
+    _close: Callable[[], None] | None
 
     __slots__ = ("_close",)
 
     def squeeze(
         self,
-        dim: Union[Hashable, Iterable[Hashable], None] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
         drop: bool = False,
-        axis: Union[int, Iterable[int], None] = None,
+        axis: int | Iterable[int] | None = None,
     ):
         """Return a new object with squeezed data.
 
@@ -424,8 +417,8 @@ def get_index(self, key: Hashable) -> pd.Index:
             return pd.Index(range(self.sizes[key]), name=key)
 
     def _calc_assign_results(
-        self: C, kwargs: Mapping[Any, Union[T, Callable[[C], T]]]
-    ) -> Dict[Hashable, T]:
+        self: C, kwargs: Mapping[Any, T | Callable[[C], T]]
+    ) -> dict[Hashable, T]:
         return {k: v(self) if callable(v) else v for k, v in kwargs.items()}
 
     def assign_coords(self, coords=None, **coords_kwargs):
@@ -543,7 +536,7 @@ def assign_attrs(self, *args, **kwargs):
 
     def pipe(
         self,
-        func: Union[Callable[..., T], Tuple[Callable[..., T], str]],
+        func: Callable[..., T] | tuple[Callable[..., T], str],
         *args,
         **kwargs,
     ) -> T:
@@ -810,7 +803,7 @@ def groupby_bins(
             },
         )
 
-    def weighted(self: T_DataWithCoords, weights: "DataArray") -> Weighted[T_Xarray]:
+    def weighted(self: T_DataWithCoords, weights: DataArray) -> Weighted[T_Xarray]:
         """
         Weighted operations.
 
@@ -833,7 +826,7 @@ def rolling(
         self,
         dim: Mapping[Any, int] = None,
         min_periods: int = None,
-        center: Union[bool, Mapping[Any, bool]] = False,
+        center: bool | Mapping[Any, bool] = False,
         **window_kwargs: int,
     ):
         """
@@ -868,7 +861,7 @@ def rolling(
         ...     np.linspace(0, 11, num=12),
         ...     coords=[
         ...         pd.date_range(
-        ...             "15/12/1999",
+        ...             "1999-12-15",
         ...             periods=12,
         ...             freq=pd.DateOffset(months=1),
         ...         )
@@ -948,7 +941,7 @@ def coarsen(
         self,
         dim: Mapping[Any, int] = None,
         boundary: str = "exact",
-        side: Union[str, Mapping[Any, str]] = "left",
+        side: str | Mapping[Any, str] = "left",
         coord_func: str = "mean",
         **window_kwargs: int,
     ):
@@ -981,7 +974,7 @@ def coarsen(
         >>> da = xr.DataArray(
         ...     np.linspace(0, 364, num=364),
         ...     dims="time",
-        ...     coords={"time": pd.date_range("15/12/1999", periods=364)},
+        ...     coords={"time": pd.date_range("1999-12-15", periods=364)},
         ... )
         >>> da  # +doctest: ELLIPSIS
         <xarray.DataArray (time: 364)>
@@ -1077,7 +1070,7 @@ def resample(
         ...     np.linspace(0, 11, num=12),
         ...     coords=[
         ...         pd.date_range(
-        ...             "15/12/1999",
+        ...             "1999-12-15",
         ...             periods=12,
         ...             freq=pd.DateOffset(months=1),
         ...         )
@@ -1298,7 +1291,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):
 
         return ops.where_method(self, cond, other)
 
-    def set_close(self, close: Optional[Callable[[], None]]) -> None:
+    def set_close(self, close: Callable[[], None] | None) -> None:
         """Register the function that releases any resources linked to this object.
 
         This method controls how xarray cleans up resources associated
@@ -1531,20 +1524,20 @@ def __getitem__(self, value):
 
 @overload
 def full_like(
-    other: "Dataset",
+    other: Dataset,
     fill_value,
-    dtype: Union[DTypeLike, Mapping[Any, DTypeLike]] = None,
-) -> "Dataset":
+    dtype: DTypeLike | Mapping[Any, DTypeLike] = None,
+) -> Dataset:
     ...
 
 
 @overload
-def full_like(other: "DataArray", fill_value, dtype: DTypeLike = None) -> "DataArray":
+def full_like(other: DataArray, fill_value, dtype: DTypeLike = None) -> DataArray:
     ...
 
 
 @overload
-def full_like(other: "Variable", fill_value, dtype: DTypeLike = None) -> "Variable":
+def full_like(other: Variable, fill_value, dtype: DTypeLike = None) -> Variable:
     ...
 
 
@@ -1823,9 +1816,9 @@ def ones_like(other, dtype: DTypeLike = None):
 
 def get_chunksizes(
     variables: Iterable[Variable],
-) -> Mapping[Any, Tuple[int, ...]]:
+) -> Mapping[Any, tuple[int, ...]]:
 
-    chunks: Dict[Any, Tuple[int, ...]] = {}
+    chunks: dict[Any, tuple[int, ...]] = {}
     for v in variables:
         if hasattr(v.data, "chunks"):
             for dim, c in v.chunksizes.items():
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 9fe93c88734..ce37251576a 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -13,15 +13,10 @@
     AbstractSet,
     Any,
     Callable,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Mapping,
-    Optional,
     Sequence,
-    Tuple,
-    Union,
 )
 
 import numpy as np
@@ -197,7 +192,7 @@ def result_name(objects: list) -> Any:
     return name
 
 
-def _get_coords_list(args) -> List[Coordinates]:
+def _get_coords_list(args) -> list[Coordinates]:
     coords_list = []
     for arg in args:
         try:
@@ -214,7 +209,7 @@ def build_output_coords(
     signature: _UFuncSignature,
     exclude_dims: AbstractSet = frozenset(),
     combine_attrs: str = "override",
-) -> "List[Dict[Any, Variable]]":
+) -> list[dict[Any, Variable]]:
     """Build output coordinates for an operation.
 
     Parameters
@@ -309,11 +304,11 @@ def apply_dataarray_vfunc(
     return out
 
 
-def ordered_set_union(all_keys: List[Iterable]) -> Iterable:
+def ordered_set_union(all_keys: list[Iterable]) -> Iterable:
     return {key: None for keys in all_keys for key in keys}.keys()
 
 
-def ordered_set_intersection(all_keys: List[Iterable]) -> Iterable:
+def ordered_set_intersection(all_keys: list[Iterable]) -> Iterable:
     intersection = set(all_keys[0])
     for keys in all_keys[1:]:
         intersection.intersection_update(keys)
@@ -331,7 +326,7 @@ def assert_and_return_exact_match(all_keys):
     return first_keys
 
 
-_JOINERS: Dict[str, Callable] = {
+_JOINERS: dict[str, Callable] = {
     "inner": ordered_set_intersection,
     "outer": ordered_set_union,
     "left": operator.itemgetter(0),
@@ -340,17 +335,15 @@ def assert_and_return_exact_match(all_keys):
 }
 
 
-def join_dict_keys(
-    objects: Iterable[Union[Mapping, Any]], how: str = "inner"
-) -> Iterable:
+def join_dict_keys(objects: Iterable[Mapping | Any], how: str = "inner") -> Iterable:
     joiner = _JOINERS[how]
     all_keys = [obj.keys() for obj in objects if hasattr(obj, "keys")]
     return joiner(all_keys)
 
 
 def collect_dict_values(
-    objects: Iterable[Union[Mapping, Any]], keys: Iterable, fill_value: object = None
-) -> List[list]:
+    objects: Iterable[Mapping | Any], keys: Iterable, fill_value: object = None
+) -> list[list]:
     return [
         [obj.get(key, fill_value) if is_dict_like(obj) else obj for obj in objects]
         for key in keys
@@ -368,9 +361,9 @@ def _as_variables_or_variable(arg):
 
 
 def _unpack_dict_tuples(
-    result_vars: Mapping[Any, Tuple[Variable, ...]], num_outputs: int
-) -> Tuple[Dict[Hashable, Variable], ...]:
-    out: Tuple[Dict[Hashable, Variable], ...] = tuple({} for _ in range(num_outputs))
+    result_vars: Mapping[Any, tuple[Variable, ...]], num_outputs: int
+) -> tuple[dict[Hashable, Variable], ...]:
+    out: tuple[dict[Hashable, Variable], ...] = tuple({} for _ in range(num_outputs))
     for name, values in result_vars.items():
         for value, results_dict in zip(values, out):
             results_dict[name] = value
@@ -398,7 +391,7 @@ def apply_dict_of_variables_vfunc(
 
 
 def _fast_dataset(
-    variables: Dict[Hashable, Variable], coord_variables: Mapping[Hashable, Variable]
+    variables: dict[Hashable, Variable], coord_variables: Mapping[Hashable, Variable]
 ) -> Dataset:
     """Create a dataset as quickly as possible.
 
@@ -528,9 +521,9 @@ def apply_groupby_func(func, *args):
 
 def unified_dim_sizes(
     variables: Iterable[Variable], exclude_dims: AbstractSet = frozenset()
-) -> Dict[Hashable, int]:
+) -> dict[Hashable, int]:
 
-    dim_sizes: Dict[Hashable, int] = {}
+    dim_sizes: dict[Hashable, int] = {}
 
     for var in variables:
         if len(set(var.dims)) < len(var.dims):
@@ -556,8 +549,8 @@ def unified_dim_sizes(
 
 def broadcast_compat_data(
     variable: Variable,
-    broadcast_dims: Tuple[Hashable, ...],
-    core_dims: Tuple[Hashable, ...],
+    broadcast_dims: tuple[Hashable, ...],
+    core_dims: tuple[Hashable, ...],
 ) -> Any:
     data = variable.data
 
@@ -595,7 +588,7 @@ def broadcast_compat_data(
         data = duck_array_ops.transpose(data, order)
 
     if new_dims != reordered_dims:
-        key_parts: List[Optional[slice]] = []
+        key_parts: list[slice | None] = []
         for dim in new_dims:
             if dim in set_old_dims:
                 key_parts.append(SLICE_NONE)
@@ -810,19 +803,19 @@ def apply_ufunc(
     func: Callable,
     *args: Any,
     input_core_dims: Sequence[Sequence] = None,
-    output_core_dims: Optional[Sequence[Sequence]] = ((),),
+    output_core_dims: Sequence[Sequence] | None = ((),),
     exclude_dims: AbstractSet = frozenset(),
     vectorize: bool = False,
     join: str = "exact",
     dataset_join: str = "exact",
     dataset_fill_value: object = _NO_FILL_VALUE,
-    keep_attrs: Union[bool, str] = None,
-    kwargs: Mapping = None,
+    keep_attrs: bool | str | None = None,
+    kwargs: Mapping | None = None,
     dask: str = "forbidden",
-    output_dtypes: Sequence = None,
-    output_sizes: Mapping[Any, int] = None,
+    output_dtypes: Sequence | None = None,
+    output_sizes: Mapping[Any, int] | None = None,
     meta: Any = None,
-    dask_gufunc_kwargs: Dict[str, Any] = None,
+    dask_gufunc_kwargs: dict[str, Any] | None = None,
 ) -> Any:
     """Apply a vectorized function for unlabeled arrays on xarray objects.
 
@@ -951,7 +944,7 @@ def apply_ufunc(
     Calculate the vector magnitude of two arguments:
 
     >>> def magnitude(a, b):
-    ...     func = lambda x, y: np.sqrt(x ** 2 + y ** 2)
+    ...     func = lambda x, y: np.sqrt(x**2 + y**2)
     ...     return xr.apply_ufunc(func, a, b)
     ...
 
@@ -1051,8 +1044,8 @@ def apply_ufunc(
 
     References
     ----------
-    .. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html
-    .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
+    .. [1] https://numpy.org/doc/stable/reference/ufuncs.html
+    .. [2] https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html
     """
     from .dataarray import DataArray
     from .groupby import GroupBy
@@ -1375,8 +1368,8 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
 
 
 def cross(
-    a: Union[DataArray, Variable], b: Union[DataArray, Variable], *, dim: Hashable
-) -> Union[DataArray, Variable]:
+    a: DataArray | Variable, b: DataArray | Variable, *, dim: Hashable
+) -> DataArray | Variable:
     """
     Compute the cross product of two (arrays of) vectors.
 
@@ -1727,7 +1720,7 @@ def dot(*arrays, dims=None, **kwargs):
     return result.transpose(*all_dims, missing_dims="ignore")
 
 
-def where(cond, x, y):
+def where(cond, x, y, keep_attrs=None):
     """Return elements from `x` or `y` depending on `cond`.
 
     Performs xarray-like broadcasting across input arguments.
@@ -1743,6 +1736,8 @@ def where(cond, x, y):
         values to choose from where `cond` is True
     y : scalar, array, Variable, DataArray or Dataset
         values to choose from where `cond` is False
+    keep_attrs : bool or str or callable, optional
+        How to treat attrs. If True, keep the attrs of `x`.
 
     Returns
     -------
@@ -1808,6 +1803,14 @@ def where(cond, x, y):
     Dataset.where, DataArray.where :
         equivalent methods
     """
+    if keep_attrs is None:
+        keep_attrs = _get_keep_attrs(default=False)
+
+    if keep_attrs is True:
+        # keep the attributes of x, the second parameter, by default to
+        # be consistent with the `where` method of `DataArray` and `Dataset`
+        keep_attrs = lambda attrs, context: attrs[1]
+
     # alignment for three arguments is complicated, so don't support it yet
     return apply_ufunc(
         duck_array_ops.where,
@@ -1817,6 +1820,7 @@ def where(cond, x, y):
         join="exact",
         dataset_join="exact",
         dask="allowed",
+        keep_attrs=keep_attrs,
     )
 
 
@@ -1915,7 +1919,7 @@ def _calc_idxminmax(
     return res
 
 
-def unify_chunks(*objects: T_Xarray) -> Tuple[T_Xarray, ...]:
+def unify_chunks(*objects: T_Xarray) -> tuple[T_Xarray, ...]:
     """
     Given any number of Dataset and/or DataArray objects, returns
     new objects with unified chunk size along all chunked dimensions.
@@ -1937,7 +1941,7 @@ def unify_chunks(*objects: T_Xarray) -> Tuple[T_Xarray, ...]:
         for obj in objects
     ]
 
-    # Get argumets to pass into dask.array.core.unify_chunks
+    # Get arguments to pass into dask.array.core.unify_chunks
     unify_chunks_args = []
     sizes: dict[Hashable, int] = {}
     for ds in datasets:
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 7ead1918e1a..1e6e246322e 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -1,18 +1,6 @@
 from __future__ import annotations
 
-from typing import (
-    TYPE_CHECKING,
-    Dict,
-    Hashable,
-    Iterable,
-    List,
-    Literal,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-    overload,
-)
+from typing import TYPE_CHECKING, Hashable, Iterable, Literal, overload
 
 import pandas as pd
 
@@ -35,31 +23,31 @@
 
 @overload
 def concat(
-    objs: Iterable["Dataset"],
-    dim: Hashable | "DataArray" | pd.Index,
-    data_vars: concat_options | List[Hashable] = "all",
-    coords: concat_options | List[Hashable] = "different",
+    objs: Iterable[Dataset],
+    dim: Hashable | DataArray | pd.Index,
+    data_vars: concat_options | list[Hashable] = "all",
+    coords: concat_options | list[Hashable] = "different",
     compat: compat_options = "equals",
-    positions: Optional[Iterable[int]] = None,
+    positions: Iterable[int] | None = None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "Dataset":
+) -> Dataset:
     ...
 
 
 @overload
 def concat(
-    objs: Iterable["DataArray"],
-    dim: Hashable | "DataArray" | pd.Index,
-    data_vars: concat_options | List[Hashable] = "all",
-    coords: concat_options | List[Hashable] = "different",
+    objs: Iterable[DataArray],
+    dim: Hashable | DataArray | pd.Index,
+    data_vars: concat_options | list[Hashable] = "all",
+    coords: concat_options | list[Hashable] = "different",
     compat: compat_options = "equals",
-    positions: Optional[Iterable[int]] = None,
+    positions: Iterable[int] | None = None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "DataArray":
+) -> DataArray:
     ...
 
 
@@ -394,14 +382,14 @@ def process_subset_opt(opt, subset):
 
 # determine dimensional coordinate names and a dict mapping name to DataArray
 def _parse_datasets(
-    datasets: Iterable["Dataset"],
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, int], Set[Hashable], Set[Hashable]]:
+    datasets: Iterable[Dataset],
+) -> tuple[dict[Hashable, Variable], dict[Hashable, int], set[Hashable], set[Hashable]]:
 
-    dims: Set[Hashable] = set()
-    all_coord_names: Set[Hashable] = set()
-    data_vars: Set[Hashable] = set()  # list of data_vars
-    dim_coords: Dict[Hashable, Variable] = {}  # maps dim name to variable
-    dims_sizes: Dict[Hashable, int] = {}  # shared dimension sizes to expand variables
+    dims: set[Hashable] = set()
+    all_coord_names: set[Hashable] = set()
+    data_vars: set[Hashable] = set()  # list of data_vars
+    dim_coords: dict[Hashable, Variable] = {}  # maps dim name to variable
+    dims_sizes: dict[Hashable, int] = {}  # shared dimension sizes to expand variables
 
     for ds in datasets:
         dims_sizes.update(ds.dims)
@@ -421,16 +409,16 @@ def _parse_datasets(
 
 
 def _dataset_concat(
-    datasets: List["Dataset"],
-    dim: Union[str, "DataArray", pd.Index],
-    data_vars: Union[str, List[str]],
-    coords: Union[str, List[str]],
+    datasets: list[Dataset],
+    dim: str | DataArray | pd.Index,
+    data_vars: str | list[str],
+    coords: str | list[str],
     compat: str,
-    positions: Optional[Iterable[int]],
+    positions: Iterable[int] | None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "Dataset":
+) -> Dataset:
     """
     Concatenate a sequence of datasets along a new or existing dimension
     """
@@ -467,7 +455,7 @@ def _dataset_concat(
     if (dim in coord_names or dim in data_names) and dim not in dim_names:
         datasets = [ds.expand_dims(dim) for ds in datasets]
 
-    # determine which variables to concatentate
+    # determine which variables to concatenate
     concat_over, equals, concat_dim_lengths = _calc_concat_over(
         datasets, dim, dim_names, data_vars, coords, compat
     )
@@ -477,7 +465,7 @@ def _dataset_concat(
 
     result_vars = {}
     if variables_to_merge:
-        to_merge: Dict[Hashable, List[Variable]] = {
+        to_merge: dict[Hashable, list[Variable]] = {
             var: [] for var in variables_to_merge
         }
 
@@ -552,16 +540,16 @@ def ensure_common_dims(vars):
 
 
 def _dataarray_concat(
-    arrays: Iterable["DataArray"],
-    dim: Union[str, "DataArray", pd.Index],
-    data_vars: Union[str, List[str]],
-    coords: Union[str, List[str]],
+    arrays: Iterable[DataArray],
+    dim: str | DataArray | pd.Index,
+    data_vars: str | list[str],
+    coords: str | list[str],
     compat: str,
-    positions: Optional[Iterable[int]],
+    positions: Iterable[int] | None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "DataArray":
+) -> DataArray:
     from .dataarray import DataArray
 
     arrays = list(arrays)
diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py
index de8375bf721..0e0229cc3ca 100644
--- a/xarray/core/dask_array_compat.py
+++ b/xarray/core/dask_array_compat.py
@@ -179,8 +179,5 @@ def sliding_window_view(x, window_shape, axis=None):
             window_shape=window_shape,
             axis=axis,
         )
-        # map_overlap's signature changed in https://github.com/dask/dask/pull/6165
-        if dask_version > Version("2.18.0"):
-            return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs)
-        else:
-            return map_overlap(x, _np_sliding_window_view, **kwargs)
+
+        return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index cb0e3c21e45..d7c3fd9bab7 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -6,16 +6,11 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Literal,
     Mapping,
-    Optional,
     Sequence,
-    Tuple,
-    Union,
     cast,
 )
 
@@ -59,6 +54,7 @@
 from .indexes import Index, Indexes, default_indexes, propagate_indexes
 from .indexing import is_fancy_indexer
 from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords
+from .npcompat import QUANTILE_METHODS, ArrayLike
 from .options import OPTIONS, _get_keep_attrs
 from .utils import (
     Default,
@@ -94,7 +90,7 @@
 
 def _infer_coords_and_dims(
     shape, coords, dims
-) -> "Tuple[Dict[Any, Variable], Tuple[Hashable, ...]]":
+) -> tuple[dict[Any, Variable], tuple[Hashable, ...]]:
     """All the logic for creating a new DataArray"""
 
     if (
@@ -132,7 +128,7 @@ def _infer_coords_and_dims(
             if not isinstance(d, str):
                 raise TypeError(f"dimension {d} is not a string")
 
-    new_coords: Dict[Any, Variable] = {}
+    new_coords: dict[Any, Variable] = {}
 
     if utils.is_dict_like(coords):
         for k, v in coords.items():
@@ -193,10 +189,10 @@ def _check_data_shape(data, coords, dims):
 class _LocIndexer:
     __slots__ = ("data_array",)
 
-    def __init__(self, data_array: "DataArray"):
+    def __init__(self, data_array: DataArray):
         self.data_array = data_array
 
-    def __getitem__(self, key) -> "DataArray":
+    def __getitem__(self, key) -> DataArray:
         if not utils.is_dict_like(key):
             # expand the indexer so we can handle Ellipsis
             labels = indexing.expanded_indexer(key, self.data_array.ndim)
@@ -344,11 +340,11 @@ class DataArray(
         units:        degC
     """
 
-    _cache: Dict[str, Any]
-    _coords: Dict[Any, Variable]
-    _close: Optional[Callable[[], None]]
-    _indexes: Optional[Dict[Hashable, Index]]
-    _name: Optional[Hashable]
+    _cache: dict[str, Any]
+    _coords: dict[Any, Variable]
+    _close: Callable[[], None] | None
+    _indexes: dict[Hashable, Index] | None
+    _name: Hashable | None
     _variable: Variable
 
     __slots__ = (
@@ -372,12 +368,12 @@ class DataArray(
     def __init__(
         self,
         data: Any = dtypes.NA,
-        coords: Union[Sequence[Tuple], Mapping[Any, Any], None] = None,
-        dims: Union[Hashable, Sequence[Hashable], None] = None,
+        coords: Sequence[tuple] | Mapping[Any, Any] | None = None,
+        dims: Hashable | Sequence[Hashable] | None = None,
         name: Hashable = None,
         attrs: Mapping = None,
         # internal parameters
-        indexes: Dict[Hashable, pd.Index] = None,
+        indexes: dict[Hashable, pd.Index] = None,
         fastpath: bool = False,
     ):
         if fastpath:
@@ -428,7 +424,7 @@ def _replace(
         self: T_DataArray,
         variable: Variable = None,
         coords=None,
-        name: Union[Hashable, None, Default] = _default,
+        name: Hashable | None | Default = _default,
         indexes=None,
     ) -> T_DataArray:
         if variable is None:
@@ -440,8 +436,8 @@ def _replace(
         return type(self)(variable, coords, name=name, fastpath=True, indexes=indexes)
 
     def _replace_maybe_drop_dims(
-        self, variable: Variable, name: Union[Hashable, None, Default] = _default
-    ) -> "DataArray":
+        self, variable: Variable, name: Hashable | None | Default = _default
+    ) -> DataArray:
         if variable.dims == self.dims and variable.shape == self.shape:
             coords = self._coords.copy()
             indexes = self._indexes
@@ -467,7 +463,7 @@ def _replace_maybe_drop_dims(
             )
         return self._replace(variable, coords, name, indexes=indexes)
 
-    def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> "DataArray":
+    def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> DataArray:
         if not len(indexes):
             return self
         coords = self._coords.copy()
@@ -476,7 +472,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> "DataArray":
         obj = self._replace(coords=coords)
 
         # switch from dimension to level names, if necessary
-        dim_names: Dict[Any, str] = {}
+        dim_names: dict[Any, str] = {}
         for dim, idx in indexes.items():
             pd_idx = idx.to_pandas_index()
             if not isinstance(idx, pd.MultiIndex) and pd_idx.name != dim:
@@ -489,8 +485,8 @@ def _to_temp_dataset(self) -> Dataset:
         return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False)
 
     def _from_temp_dataset(
-        self, dataset: Dataset, name: Union[Hashable, None, Default] = _default
-    ) -> "DataArray":
+        self, dataset: Dataset, name: Hashable | None | Default = _default
+    ) -> DataArray:
         variable = dataset._variables.pop(_THIS_ARRAY)
         coords = dataset._variables
         indexes = dataset._indexes
@@ -583,12 +579,12 @@ def to_dataset(
         return result
 
     @property
-    def name(self) -> Optional[Hashable]:
+    def name(self) -> Hashable | None:
         """The name of this array."""
         return self._name
 
     @name.setter
-    def name(self, value: Optional[Hashable]) -> None:
+    def name(self, value: Hashable | None) -> None:
         self._name = value
 
     @property
@@ -601,7 +597,7 @@ def dtype(self) -> np.dtype:
         return self.variable.dtype
 
     @property
-    def shape(self) -> Tuple[int, ...]:
+    def shape(self) -> tuple[int, ...]:
         return self.variable.shape
 
     @property
@@ -690,7 +686,7 @@ def to_index(self) -> pd.Index:
         return self.variable.to_index()
 
     @property
-    def dims(self) -> Tuple[Hashable, ...]:
+    def dims(self) -> tuple[Hashable, ...]:
         """Tuple of dimension names associated with this array.
 
         Note that the type of this property is inconsistent with
@@ -713,11 +709,11 @@ def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]:
         return dict(zip(self.dims, key))
 
     @property
-    def _level_coords(self) -> Dict[Hashable, Hashable]:
+    def _level_coords(self) -> dict[Hashable, Hashable]:
         """Return a mapping of all MultiIndex levels and their corresponding
         coordinate name.
         """
-        level_coords: Dict[Hashable, Hashable] = {}
+        level_coords: dict[Hashable, Hashable] = {}
 
         for cname, var in self._coords.items():
             if var.ndim == 1 and isinstance(var, IndexVariable):
@@ -740,7 +736,7 @@ def _getitem_coord(self, key):
 
         return self._replace_maybe_drop_dims(var, name=key)
 
-    def __getitem__(self, key: Any) -> "DataArray":
+    def __getitem__(self, key: Any) -> DataArray:
         if isinstance(key, str):
             return self._getitem_coord(key)
         else:
@@ -793,7 +789,7 @@ def loc(self) -> _LocIndexer:
 
     @property
     # Key type needs to be `Any` because of mypy#4167
-    def attrs(self) -> Dict[Any, Any]:
+    def attrs(self) -> dict[Any, Any]:
         """Dictionary storing arbitrary metadata with this array."""
         return self.variable.attrs
 
@@ -803,7 +799,7 @@ def attrs(self, value: Mapping[Any, Any]) -> None:
         self.variable.attrs = value  # type: ignore[assignment]
 
     @property
-    def encoding(self) -> Dict[Hashable, Any]:
+    def encoding(self) -> dict[Hashable, Any]:
         """Dictionary of format-specific settings for how this array should be
         serialized."""
         return self.variable.encoding
@@ -840,9 +836,9 @@ def coords(self) -> DataArrayCoordinates:
 
     def reset_coords(
         self,
-        names: Union[Iterable[Hashable], Hashable, None] = None,
+        names: Iterable[Hashable] | Hashable | None = None,
         drop: bool = False,
-    ) -> Union[None, "DataArray", Dataset]:
+    ) -> None | DataArray | Dataset:
         """Given names of coordinates, reset them to become variables.
 
         Parameters
@@ -907,7 +903,7 @@ def _dask_finalize(results, name, func, *args, **kwargs):
         coords = ds._variables
         return DataArray(variable, coords, name=name, fastpath=True)
 
-    def load(self, **kwargs) -> "DataArray":
+    def load(self, **kwargs) -> DataArray:
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return this array.
 
@@ -931,7 +927,7 @@ def load(self, **kwargs) -> "DataArray":
         self._coords = new._coords
         return self
 
-    def compute(self, **kwargs) -> "DataArray":
+    def compute(self, **kwargs) -> DataArray:
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return a new array. The original is
         left unaltered.
@@ -953,7 +949,7 @@ def compute(self, **kwargs) -> "DataArray":
         new = self.copy(deep=False)
         return new.load(**kwargs)
 
-    def persist(self, **kwargs) -> "DataArray":
+    def persist(self, **kwargs) -> DataArray:
         """Trigger computation in constituent dask arrays
 
         This keeps them as dask arrays but encourages them to keep data in
@@ -1048,10 +1044,10 @@ def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray:
             indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()}
         return self._replace(variable, coords, indexes=indexes)
 
-    def __copy__(self) -> "DataArray":
+    def __copy__(self) -> DataArray:
         return self.copy(deep=False)
 
-    def __deepcopy__(self, memo=None) -> "DataArray":
+    def __deepcopy__(self, memo=None) -> DataArray:
         # memo does nothing but is required for compatibility with
         # copy.deepcopy
         return self.copy(deep=True)
@@ -1061,7 +1057,7 @@ def __deepcopy__(self, memo=None) -> "DataArray":
     __hash__ = None  # type: ignore[assignment]
 
     @property
-    def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
+    def chunks(self) -> tuple[tuple[int, ...], ...] | None:
         """
         Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
         the underlying data is not a dask array.
@@ -1075,7 +1071,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
         return self.variable.chunks
 
     @property
-    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataarray's data, or None if
         the underlying data is not a dask array.
@@ -1095,17 +1091,17 @@ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
 
     def chunk(
         self,
-        chunks: Union[
-            int,
-            Literal["auto"],
-            Tuple[int, ...],
-            Tuple[Tuple[int, ...], ...],
-            Mapping[Any, Union[None, int, Tuple[int, ...]]],
-        ] = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
+        chunks: (
+            int
+            | Literal["auto"]
+            | tuple[int, ...]
+            | tuple[tuple[int, ...], ...]
+            | Mapping[Any, None | int | tuple[int, ...]]
+        ) = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Coerce this array's data into a dask arrays with the given chunks.
 
         If this variable is a non-dask array, it will be converted to dask
@@ -1147,7 +1143,7 @@ def isel(
         drop: bool = False,
         missing_dims: str = "raise",
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by integer indexing
         along the specified dimension(s).
 
@@ -1231,7 +1227,7 @@ def sel(
         tolerance=None,
         drop: bool = False,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by selecting index
         labels along the specified dimension(s).
 
@@ -1344,9 +1340,9 @@ def sel(
 
     def head(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by the the first `n`
         values along the specified dimension(s). Default `n` = 5
 
@@ -1361,9 +1357,9 @@ def head(
 
     def tail(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by the the last `n`
         values along the specified dimension(s). Default `n` = 5
 
@@ -1378,9 +1374,9 @@ def tail(
 
     def thin(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by each `n` value
         along the specified dimension(s).
 
@@ -1394,8 +1390,8 @@ def thin(
         return self._from_temp_dataset(ds)
 
     def broadcast_like(
-        self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None
-    ) -> "DataArray":
+        self, other: DataArray | Dataset, exclude: Iterable[Hashable] | None = None
+    ) -> DataArray:
         """Broadcast this DataArray against another Dataset or DataArray.
 
         This is equivalent to xr.broadcast(other, self)[1]
@@ -1469,12 +1465,12 @@ def broadcast_like(
 
     def reindex_like(
         self,
-        other: Union["DataArray", Dataset],
-        method: str = None,
-        tolerance=None,
+        other: DataArray | Dataset,
+        method: str | None = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value=dtypes.NA,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -1499,6 +1495,10 @@ def reindex_like(
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
+            Tolerance may be a scalar value, which applies the same tolerance
+            to all values, or list-like, which applies variable tolerance per
+            element. List-like must be the same size as the index and its dtype
+            must exactly match the index’s type.
         copy : bool, optional
             If ``copy=True``, data in the return value is always copied. If
             ``copy=False`` and reindexing is unnecessary, or can be performed
@@ -1533,11 +1533,11 @@ def reindex(
         self,
         indexers: Mapping[Any, Any] = None,
         method: str = None,
-        tolerance=None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value=dtypes.NA,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -1566,6 +1566,10 @@ def reindex(
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
+            Tolerance may be a scalar value, which applies the same tolerance
+            to all values, or list-like, which applies variable tolerance per
+            element. List-like must be the same size as the index and its dtype
+            must exactly match the index’s type.
         fill_value : scalar or dict-like, optional
             Value to use for newly missing values. If a dict-like, maps
             variable names (including coordinates) to fill values. Use this
@@ -1629,7 +1633,7 @@ def interp(
         assume_sorted: bool = False,
         kwargs: Mapping[str, Any] = None,
         **coords_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Multidimensional interpolation of variables.
 
         Parameters
@@ -1754,11 +1758,11 @@ def interp(
 
     def interp_like(
         self,
-        other: Union["DataArray", Dataset],
+        other: DataArray | Dataset,
         method: str = "linear",
         assume_sorted: bool = False,
         kwargs: Mapping[str, Any] = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Interpolate this object onto the coordinates of another object,
         filling out of range values with NaN.
 
@@ -1810,9 +1814,9 @@ def interp_like(
 
     def rename(
         self,
-        new_name_or_name_dict: Union[Hashable, Mapping[Any, Hashable]] = None,
+        new_name_or_name_dict: Hashable | Mapping[Any, Hashable] = None,
         **names: Hashable,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Returns a new DataArray with renamed coordinates or a new name.
 
         Parameters
@@ -1849,7 +1853,7 @@ def rename(
 
     def swap_dims(
         self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs
-    ) -> "DataArray":
+    ) -> DataArray:
         """Returns a new DataArray with swapped dimensions.
 
         Parameters
@@ -1906,10 +1910,10 @@ def swap_dims(
 
     def expand_dims(
         self,
-        dim: Union[None, Hashable, Sequence[Hashable], Mapping[Any, Any]] = None,
+        dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis=None,
         **dim_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new object with an additional axis (or axes) inserted at
         the corresponding position in the array shape. The new object is a
         view into the underlying array, not a copy.
@@ -1958,10 +1962,10 @@ def expand_dims(
 
     def set_index(
         self,
-        indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]] = None,
+        indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None,
         append: bool = False,
-        **indexes_kwargs: Union[Hashable, Sequence[Hashable]],
-    ) -> "DataArray":
+        **indexes_kwargs: Hashable | Sequence[Hashable],
+    ) -> DataArray:
         """Set DataArray (multi-)indexes using one or more existing
         coordinates.
 
@@ -2015,9 +2019,9 @@ def set_index(
 
     def reset_index(
         self,
-        dims_or_levels: Union[Hashable, Sequence[Hashable]],
+        dims_or_levels: Hashable | Sequence[Hashable],
         drop: bool = False,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Reset the specified index(es) or multi-index level(s).
 
         Parameters
@@ -2048,7 +2052,7 @@ def reorder_levels(
         self,
         dim_order: Mapping[Any, Sequence[int]] = None,
         **dim_order_kwargs: Sequence[int],
-    ) -> "DataArray":
+    ) -> DataArray:
         """Rearrange index levels using input order.
 
         Parameters
@@ -2083,7 +2087,7 @@ def stack(
         self,
         dimensions: Mapping[Any, Sequence[Hashable]] = None,
         **dimensions_kwargs: Sequence[Hashable],
-    ) -> "DataArray":
+    ) -> DataArray:
         """
         Stack any number of existing dimensions into a single new dimension.
 
@@ -2139,10 +2143,10 @@ def stack(
 
     def unstack(
         self,
-        dim: Union[Hashable, Sequence[Hashable], None] = None,
+        dim: Hashable | Sequence[Hashable] | None = None,
         fill_value: Any = dtypes.NA,
         sparse: bool = False,
-    ) -> "DataArray":
+    ) -> DataArray:
         """
         Unstack existing dimensions corresponding to MultiIndexes into
         multiple new dimensions.
@@ -2273,7 +2277,7 @@ def transpose(
         *dims: Hashable,
         transpose_coords: bool = True,
         missing_dims: str = "raise",
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray object with transposed dimensions.
 
         Parameters
@@ -2310,7 +2314,7 @@ def transpose(
             dims = tuple(utils.infix_dims(dims, self.dims, missing_dims))
         variable = self.variable.transpose(*dims)
         if transpose_coords:
-            coords: Dict[Hashable, Variable] = {}
+            coords: dict[Hashable, Variable] = {}
             for name, coord in self.coords.items():
                 coord_dims = tuple(dim for dim in dims if dim in coord.dims)
                 coords[name] = coord.variable.transpose(*coord_dims)
@@ -2319,12 +2323,12 @@ def transpose(
             return self._replace(variable)
 
     @property
-    def T(self) -> "DataArray":
+    def T(self) -> DataArray:
         return self.transpose()
 
     def drop_vars(
-        self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
-    ) -> "DataArray":
+        self, names: Hashable | Iterable[Hashable], *, errors: str = "raise"
+    ) -> DataArray:
         """Returns an array with dropped variables.
 
         Parameters
@@ -2351,7 +2355,7 @@ def drop(
         *,
         errors: str = "raise",
         **labels_kwargs,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Backward compatible method based on `drop_vars` and `drop_sel`
 
         Using either `drop_vars` or `drop_sel` is encouraged
@@ -2370,7 +2374,7 @@ def drop_sel(
         *,
         errors: str = "raise",
         **labels_kwargs,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Drop index labels from this DataArray.
 
         Parameters
@@ -2417,9 +2421,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs):
         dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs)
         return self._from_temp_dataset(dataset)
 
-    def dropna(
-        self, dim: Hashable, how: str = "any", thresh: int = None
-    ) -> "DataArray":
+    def dropna(self, dim: Hashable, how: str = "any", thresh: int = None) -> DataArray:
         """Returns a new array with dropped labels for missing values along
         the provided dimension.
 
@@ -2441,7 +2443,7 @@ def dropna(
         ds = self._to_temp_dataset().dropna(dim, how=how, thresh=thresh)
         return self._from_temp_dataset(ds)
 
-    def fillna(self, value: Any) -> "DataArray":
+    def fillna(self, value: Any) -> DataArray:
         """Fill missing values in this object.
 
         This operation follows the normal broadcasting and alignment rules that
@@ -2473,13 +2475,13 @@ def interpolate_na(
         dim: Hashable = None,
         method: str = "linear",
         limit: int = None,
-        use_coordinate: Union[bool, str] = True,
-        max_gap: Union[
-            int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta
-        ] = None,
+        use_coordinate: bool | str = True,
+        max_gap: (
+            int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta
+        ) = None,
         keep_attrs: bool = None,
         **kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Fill in NaNs by interpolating according to different methods.
 
         Parameters
@@ -2584,8 +2586,8 @@ def interpolate_na(
             **kwargs,
         )
 
-    def ffill(self, dim: Hashable, limit: int = None) -> "DataArray":
-        """Fill NaN values by propogating values forward
+    def ffill(self, dim: Hashable, limit: int = None) -> DataArray:
+        """Fill NaN values by propagating values forward
 
         *Requires bottleneck.*
 
@@ -2609,8 +2611,8 @@ def ffill(self, dim: Hashable, limit: int = None) -> "DataArray":
 
         return ffill(self, dim, limit=limit)
 
-    def bfill(self, dim: Hashable, limit: int = None) -> "DataArray":
-        """Fill NaN values by propogating values backward
+    def bfill(self, dim: Hashable, limit: int = None) -> DataArray:
+        """Fill NaN values by propagating values backward
 
         *Requires bottleneck.*
 
@@ -2634,7 +2636,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "DataArray":
 
         return bfill(self, dim, limit=limit)
 
-    def combine_first(self, other: "DataArray") -> "DataArray":
+    def combine_first(self, other: DataArray) -> DataArray:
         """Combine two DataArray objects, with union of coordinates.
 
         This operation follows the normal broadcasting and alignment rules of
@@ -2655,13 +2657,13 @@ def combine_first(self, other: "DataArray") -> "DataArray":
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        dim: None | Hashable | Sequence[Hashable] = None,
         *,
-        axis: Union[None, int, Sequence[int]] = None,
+        axis: None | int | Sequence[int] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         **kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Reduce this array by applying `func` along some dimension(s).
 
         Parameters
@@ -2698,7 +2700,7 @@ def reduce(
         var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, **kwargs)
         return self._replace_maybe_drop_dims(var)
 
-    def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]:
+    def to_pandas(self) -> DataArray | pd.Series | pd.DataFrame:
         """Convert this array into a pandas object with the same shape.
 
         The type of the returned object depends on the number of DataArray
@@ -2726,7 +2728,7 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]:
         return constructor(self.values, *indexes)
 
     def to_dataframe(
-        self, name: Hashable = None, dim_order: List[Hashable] = None
+        self, name: Hashable = None, dim_order: list[Hashable] = None
     ) -> pd.DataFrame:
         """Convert this array and its coordinates into a tidy pandas.DataFrame.
 
@@ -2815,7 +2817,7 @@ def to_masked_array(self, copy: bool = True) -> np.ma.MaskedArray:
         isnull = pd.isnull(values)
         return np.ma.MaskedArray(data=values, mask=isnull, copy=copy)
 
-    def to_netcdf(self, *args, **kwargs) -> Union[bytes, "Delayed", None]:
+    def to_netcdf(self, *args, **kwargs) -> bytes | Delayed | None:
         """Write DataArray contents to a netCDF file.
 
         All parameters are passed directly to :py:meth:`xarray.Dataset.to_netcdf`.
@@ -2874,26 +2876,8 @@ def to_dict(self, data: bool = True) -> dict:
         return d
 
     @classmethod
-    def from_dict(cls, d: dict) -> "DataArray":
-        """
-        Convert a dictionary into an xarray.DataArray
-
-        Input dict can take several forms:
-
-        .. code:: python
-
-            d = {"dims": "t", "data": x}
-
-            d = {
-                "coords": {"t": {"dims": "t", "data": t, "attrs": {"units": "s"}}},
-                "attrs": {"title": "air temperature"},
-                "dims": "t",
-                "data": x,
-                "name": "a",
-            }
-
-        where "t" is the name of the dimension, "a" is the name of the array,
-        and x and t are lists, numpy.arrays, or pandas objects.
+    def from_dict(cls, d: dict) -> DataArray:
+        """Convert a dictionary into an xarray.DataArray
 
         Parameters
         ----------
@@ -2908,6 +2892,33 @@ def from_dict(cls, d: dict) -> "DataArray":
         --------
         DataArray.to_dict
         Dataset.from_dict
+
+        Examples
+        --------
+        >>> d = {"dims": "t", "data": [1, 2, 3]}
+        >>> da = xr.DataArray.from_dict(d)
+        >>> da
+        <xarray.DataArray (t: 3)>
+        array([1, 2, 3])
+        Dimensions without coordinates: t
+
+        >>> d = {
+        ...     "coords": {
+        ...         "t": {"dims": "t", "data": [0, 1, 2], "attrs": {"units": "s"}}
+        ...     },
+        ...     "attrs": {"title": "air temperature"},
+        ...     "dims": "t",
+        ...     "data": [10, 20, 30],
+        ...     "name": "a",
+        ... }
+        >>> da = xr.DataArray.from_dict(d)
+        >>> da
+        <xarray.DataArray 'a' (t: 3)>
+        array([10, 20, 30])
+        Coordinates:
+          * t        (t) int64 0 1 2
+        Attributes:
+            title:    air temperature
         """
         coords = None
         if "coords" in d:
@@ -2930,7 +2941,7 @@ def from_dict(cls, d: dict) -> "DataArray":
         return obj
 
     @classmethod
-    def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray":
+    def from_series(cls, series: pd.Series, sparse: bool = False) -> DataArray:
         """Convert a pandas.Series into an xarray.DataArray.
 
         If the series's index is a MultiIndex, it will be expanded into a
@@ -2952,33 +2963,33 @@ def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray":
         result.name = series.name
         return result
 
-    def to_cdms2(self) -> "cdms2_Variable":
+    def to_cdms2(self) -> cdms2_Variable:
         """Convert this array into a cdms2.Variable"""
         from ..convert import to_cdms2
 
         return to_cdms2(self)
 
     @classmethod
-    def from_cdms2(cls, variable: "cdms2_Variable") -> "DataArray":
+    def from_cdms2(cls, variable: cdms2_Variable) -> DataArray:
         """Convert a cdms2.Variable into an xarray.DataArray"""
         from ..convert import from_cdms2
 
         return from_cdms2(variable)
 
-    def to_iris(self) -> "iris_Cube":
+    def to_iris(self) -> iris_Cube:
         """Convert this array into a iris.cube.Cube"""
         from ..convert import to_iris
 
         return to_iris(self)
 
     @classmethod
-    def from_iris(cls, cube: "iris_Cube") -> "DataArray":
+    def from_iris(cls, cube: iris_Cube) -> DataArray:
         """Convert a iris.cube.Cube into an xarray.DataArray"""
         from ..convert import from_iris
 
         return from_iris(cube)
 
-    def _all_compat(self, other: "DataArray", compat_str: str) -> bool:
+    def _all_compat(self, other: DataArray, compat_str: str) -> bool:
         """Helper function for equals, broadcast_equals, and identical"""
 
         def compat(x, y):
@@ -2988,7 +2999,7 @@ def compat(x, y):
             self, other
         )
 
-    def broadcast_equals(self, other: "DataArray") -> bool:
+    def broadcast_equals(self, other: DataArray) -> bool:
         """Two DataArrays are broadcast equal if they are equal after
         broadcasting them against each other such that they have the same
         dimensions.
@@ -3003,7 +3014,7 @@ def broadcast_equals(self, other: "DataArray") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def equals(self, other: "DataArray") -> bool:
+    def equals(self, other: DataArray) -> bool:
         """True if two DataArrays have the same dimensions, coordinates and
         values; otherwise False.
 
@@ -3023,7 +3034,7 @@ def equals(self, other: "DataArray") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def identical(self, other: "DataArray") -> bool:
+    def identical(self, other: DataArray) -> bool:
         """Like equals, but also checks the array name and attributes, and
         attributes on all coordinates.
 
@@ -3037,7 +3048,7 @@ def identical(self, other: "DataArray") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def _result_name(self, other: Any = None) -> Optional[Hashable]:
+    def _result_name(self, other: Any = None) -> Hashable | None:
         # use the same naming heuristics as pandas:
         # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356
         other_name = getattr(other, "name", _default)
@@ -3046,7 +3057,7 @@ def _result_name(self, other: Any = None) -> Optional[Hashable]:
         else:
             return None
 
-    def __array_wrap__(self, obj, context=None) -> "DataArray":
+    def __array_wrap__(self, obj, context=None) -> DataArray:
         new_var = self.variable.__array_wrap__(obj, context)
         return self._replace(new_var)
 
@@ -3120,7 +3131,7 @@ def _inplace_binary_op(self, other, f: Callable):
             ) from exc
         return self
 
-    def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None:
+    def _copy_attrs_from(self, other: DataArray | Dataset | Variable) -> None:
         self.attrs = other.attrs
 
     plot = utils.UncachedAccessor(_PlotMethods)
@@ -3158,7 +3169,7 @@ def _title_for_slice(self, truncate: int = 50) -> str:
 
         return title
 
-    def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArray":
+    def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> DataArray:
         """Calculate the n-th order discrete difference along given axis.
 
         Parameters
@@ -3209,7 +3220,7 @@ def shift(
         shifts: Mapping[Any, int] = None,
         fill_value: Any = dtypes.NA,
         **shifts_kwargs: int,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Shift this DataArray by an offset along one or more dimensions.
 
         Only the data is moved; coordinates stay in place. This is consistent
@@ -3259,7 +3270,7 @@ def roll(
         shifts: Mapping[Hashable, int] = None,
         roll_coords: bool = False,
         **shifts_kwargs: int,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Roll this array by an offset along one or more dimensions.
 
         Unlike shift, roll treats the given dimensions as periodic, so will not
@@ -3304,16 +3315,16 @@ def roll(
         return self._from_temp_dataset(ds)
 
     @property
-    def real(self) -> "DataArray":
+    def real(self) -> DataArray:
         return self._replace(self.variable.real)
 
     @property
-    def imag(self) -> "DataArray":
+    def imag(self) -> DataArray:
         return self._replace(self.variable.imag)
 
     def dot(
-        self, other: "DataArray", dims: Union[Hashable, Sequence[Hashable], None] = None
-    ) -> "DataArray":
+        self, other: DataArray, dims: Hashable | Sequence[Hashable] | None = None
+    ) -> DataArray:
         """Perform dot product of two DataArrays along their shared dims.
 
         Equivalent to taking taking tensordot over all shared dims.
@@ -3365,9 +3376,9 @@ def dot(
 
     def sortby(
         self,
-        variables: Union[Hashable, "DataArray", Sequence[Union[Hashable, "DataArray"]]],
+        variables: Hashable | DataArray | Sequence[Hashable | DataArray],
         ascending: bool = True,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Sort object by labels or values (along an axis).
 
         Sorts the dataarray, either along specified dimensions,
@@ -3380,7 +3391,7 @@ def sortby(
 
         If multiple sorts along the same dimension is
         given, numpy's lexsort is performed along that dimension:
-        https://docs.scipy.org/doc/numpy/reference/generated/numpy.lexsort.html
+        https://numpy.org/doc/stable/reference/generated/numpy.lexsort.html
         and the FIRST key in the sequence is used as the primary sort key,
         followed by the 2nd key, etc.
 
@@ -3429,12 +3440,13 @@ def sortby(
 
     def quantile(
         self,
-        q: Any,
-        dim: Union[Hashable, Sequence[Hashable], None] = None,
-        interpolation: str = "linear",
+        q: ArrayLike,
+        dim: str | Sequence[Hashable] | None = None,
+        method: QUANTILE_METHODS = "linear",
         keep_attrs: bool = None,
-        skipna: bool = True,
-    ) -> "DataArray":
+        skipna: bool = None,
+        interpolation: QUANTILE_METHODS = None,
+    ) -> DataArray:
         """Compute the qth quantile of the data along the specified dimension.
 
         Returns the qth quantiles(s) of the array elements.
@@ -3445,24 +3457,43 @@ def quantile(
             Quantile to compute, which must be between 0 and 1 inclusive.
         dim : hashable or sequence of hashable, optional
             Dimension(s) over which to apply quantile.
-        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-
-                - linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                - lower: ``i``.
-                - higher: ``j``.
-                - nearest: ``i`` or ``j``, whichever is nearest.
-                - midpoint: ``(i + j) / 2``.
+        method : str, default: "linear"
+            This optional parameter specifies the interpolation method to use when the
+            desired quantile lies between two data points. The options sorted by their R
+            type as summarized in the H&F paper [1]_ are:
+
+                1. "inverted_cdf" (*)
+                2. "averaged_inverted_cdf" (*)
+                3. "closest_observation" (*)
+                4. "interpolated_inverted_cdf" (*)
+                5. "hazen" (*)
+                6. "weibull" (*)
+                7. "linear"  (default)
+                8. "median_unbiased" (*)
+                9. "normal_unbiased" (*)
+
+            The first three methods are discontiuous.  The following discontinuous
+            variations of the default "linear" (7.) option are also available:
+
+                * "lower"
+                * "higher"
+                * "midpoint"
+                * "nearest"
+
+            See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with
+            an asterix require numpy version 1.22 or newer. The "method" argument was
+            previously called "interpolation", renamed in accordance with numpy
+            version 1.22.0.
+
         keep_attrs : bool, optional
             If True, the dataset's attributes (`attrs`) will be copied from
             the original object to the new one.  If False (default), the new
             object will be returned without attributes.
         skipna : bool, optional
-            Whether to skip missing values when aggregating.
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
 
         Returns
         -------
@@ -3508,20 +3539,27 @@ def quantile(
         Coordinates:
           * y         (y) float64 1.0 1.5 2.0 2.5
           * quantile  (quantile) float64 0.0 0.5 1.0
+
+        References
+        ----------
+        .. [1] R. J. Hyndman and Y. Fan,
+           "Sample quantiles in statistical packages,"
+           The American Statistician, 50(4), pp. 361-365, 1996
         """
 
         ds = self._to_temp_dataset().quantile(
             q,
             dim=dim,
             keep_attrs=keep_attrs,
-            interpolation=interpolation,
+            method=method,
             skipna=skipna,
+            interpolation=interpolation,
         )
         return self._from_temp_dataset(ds)
 
     def rank(
         self, dim: Hashable, pct: bool = False, keep_attrs: bool = None
-    ) -> "DataArray":
+    ) -> DataArray:
         """Ranks the data.
 
         Equal values are assigned a rank that is the average of the ranks that
@@ -3562,7 +3600,7 @@ def rank(
 
     def differentiate(
         self, coord: Hashable, edge_order: int = 1, datetime_unit: str = None
-    ) -> "DataArray":
+    ) -> DataArray:
         """ Differentiate the array with the second order accurate central
         differences.
 
@@ -3621,9 +3659,9 @@ def differentiate(
 
     def integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]] = None,
+        coord: Hashable | Sequence[Hashable] = None,
         datetime_unit: str = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Integrate along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -3675,9 +3713,9 @@ def integrate(
 
     def cumulative_integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]] = None,
+        coord: Hashable | Sequence[Hashable] = None,
         datetime_unit: str = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Integrate cumulatively along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -3735,7 +3773,7 @@ def cumulative_integrate(
         ds = self._to_temp_dataset().cumulative_integrate(coord, datetime_unit)
         return self._from_temp_dataset(ds)
 
-    def unify_chunks(self) -> "DataArray":
+    def unify_chunks(self) -> DataArray:
         """Unify chunk size along all chunked dimensions of this DataArray.
 
         Returns
@@ -3753,8 +3791,8 @@ def map_blocks(
         self,
         func: Callable[..., T_Xarray],
         args: Sequence[Any] = (),
-        kwargs: Mapping[str, Any] = None,
-        template: Union["DataArray", "Dataset"] = None,
+        kwargs: Mapping[str, Any] | None = None,
+        template: DataArray | Dataset | None = None,
     ) -> T_Xarray:
         """
         Apply a function to each block of this DataArray.
@@ -3857,9 +3895,9 @@ def polyfit(
         self,
         dim: Hashable,
         deg: int,
-        skipna: bool = None,
-        rcond: float = None,
-        w: Union[Hashable, Any] = None,
+        skipna: bool | None = None,
+        rcond: float | None = None,
+        w: Hashable | Any | None = None,
         full: bool = False,
         cov: bool = False,
     ):
@@ -3920,16 +3958,18 @@ def polyfit(
 
     def pad(
         self,
-        pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[Any, int | tuple[int, int]] | None = None,
         mode: str = "constant",
-        stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        constant_values: Union[
-            int, Tuple[int, int], Mapping[Any, Tuple[int, int]]
-        ] = None,
-        end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        reflect_type: str = None,
+        stat_length: int
+        | tuple[int, int]
+        | Mapping[Any, tuple[int, int]]
+        | None = None,
+        constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]])
+        | None = None,
+        end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
+        reflect_type: str | None = None,
         **pad_width_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Pad this array along one or more dimensions.
 
         .. warning::
@@ -4088,7 +4128,7 @@ def idxmin(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return the coordinate label of the minimum value along a dimension.
 
         Returns a new `DataArray` named after the dimension with the values of
@@ -4184,7 +4224,7 @@ def idxmax(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return the coordinate label of the maximum value along a dimension.
 
         Returns a new `DataArray` named after the dimension with the values of
@@ -4276,11 +4316,11 @@ def idxmax(
 
     def argmin(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
+    ) -> DataArray | dict[Hashable, DataArray]:
         """Index or indices of the minimum of the DataArray over one or more dimensions.
 
         If a sequence is passed to 'dim', then result returned as dict of DataArrays,
@@ -4379,11 +4419,11 @@ def argmin(
 
     def argmax(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
+    ) -> DataArray | dict[Hashable, DataArray]:
         """Index or indices of the maximum of the DataArray over one or more dimensions.
 
         If a sequence is passed to 'dim', then result returned as dict of DataArrays,
@@ -4487,7 +4527,7 @@ def query(
         engine: str = None,
         missing_dims: str = "raise",
         **queries_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new data array indexed along the specified
         dimension(s), where the indexers are given as strings containing
         Python expressions to be evaluated against the values in the array.
@@ -4557,14 +4597,14 @@ def query(
 
     def curvefit(
         self,
-        coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]],
+        coords: str | DataArray | Iterable[str | DataArray],
         func: Callable[..., Any],
-        reduce_dims: Union[Hashable, Iterable[Hashable]] = None,
+        reduce_dims: Hashable | Iterable[Hashable] = None,
         skipna: bool = True,
-        p0: Dict[str, Any] = None,
-        bounds: Dict[str, Any] = None,
+        p0: dict[str, Any] = None,
+        bounds: dict[str, Any] = None,
         param_names: Sequence[str] = None,
-        kwargs: Dict[str, Any] = None,
+        kwargs: dict[str, Any] = None,
     ):
         """
         Curve fitting optimization for arbitrary functions.
@@ -4635,17 +4675,15 @@ def curvefit(
 
     def drop_duplicates(
         self,
-        dim: Hashable,
-        keep: Union[
-            str,
-            bool,
-        ] = "first",
+        dim: Hashable | Iterable[Hashable] | ...,
+        keep: Literal["first", "last"] | Literal[False] = "first",
     ):
         """Returns a new DataArray with duplicate dimension values removed.
 
         Parameters
         ----------
-        dim : dimension label, optional
+        dim : dimension label or labels
+            Pass `...` to drop duplicates along all dimensions.
         keep : {"first", "last", False}, default: "first"
             Determines which duplicates (if any) to keep.
             - ``"first"`` : Drop duplicates except for the first occurrence.
@@ -4655,20 +4693,22 @@ def drop_duplicates(
         Returns
         -------
         DataArray
+
+        See Also
+        --------
+        Dataset.drop_duplicates
         """
-        if dim not in self.dims:
-            raise ValueError(f"'{dim}' not found in dimensions")
-        indexes = {dim: ~self.get_index(dim).duplicated(keep=keep)}
-        return self.isel(indexes)
+        deduplicated = self._to_temp_dataset().drop_duplicates(dim, keep=keep)
+        return self._from_temp_dataset(deduplicated)
 
     def convert_calendar(
         self,
         calendar: str,
         dim: str = "time",
-        align_on: Optional[str] = None,
-        missing: Optional[Any] = None,
-        use_cftime: Optional[bool] = None,
-    ) -> "DataArray":
+        align_on: str | None = None,
+        missing: Any | None = None,
+        use_cftime: bool | None = None,
+    ) -> DataArray:
         """Convert the DataArray to another calendar.
 
         Only converts the individual timestamps, does not modify any data except
@@ -4786,9 +4826,9 @@ def convert_calendar(
 
     def interp_calendar(
         self,
-        target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"],
+        target: pd.DatetimeIndex | CFTimeIndex | DataArray,
         dim: str = "time",
-    ) -> "DataArray":
+    ) -> DataArray:
         """Interpolates the DataArray to another calendar based on decimal year measure.
 
         Each timestamp in `source` and `target` are first converted to their decimal
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 568e77cad52..dd7807c2e7c 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import copy
 import datetime
 import inspect
@@ -14,18 +16,13 @@
     Callable,
     Collection,
     DefaultDict,
-    Dict,
     Hashable,
     Iterable,
     Iterator,
-    List,
+    Literal,
     Mapping,
     MutableMapping,
-    Optional,
     Sequence,
-    Set,
-    Tuple,
-    Union,
     cast,
     overload,
 )
@@ -81,6 +78,7 @@
     merge_data_and_coords,
 )
 from .missing import get_clean_interp_index
+from .npcompat import QUANTILE_METHODS, ArrayLike
 from .options import OPTIONS, _get_keep_attrs
 from .pycompat import is_duck_dask_array, sparse_array_type
 from .utils import (
@@ -106,12 +104,6 @@
     broadcast_variables,
 )
 
-# TODO: Remove this check once python 3.7 is not supported:
-if sys.version_info >= (3, 8):
-    from typing import Literal
-else:
-    from typing_extensions import Literal
-
 if TYPE_CHECKING:
     from ..backends import AbstractDataStore, ZarrStore
     from .dataarray import DataArray
@@ -145,7 +137,7 @@
 
 def _get_virtual_variable(
     variables, key: Hashable, level_vars: Mapping = None, dim_sizes: Mapping = None
-) -> Tuple[Hashable, Hashable, Variable]:
+) -> tuple[Hashable, Hashable, Variable]:
     """Get a virtual variable (e.g., 'time.year' or a MultiIndex level)
     from a dict of xarray.Variable objects (if possible)
     """
@@ -163,7 +155,7 @@ def _get_virtual_variable(
         raise KeyError(key)
 
     split_key = key.split(".", 1)
-    var_name: Optional[str]
+    var_name: str | None
     if len(split_key) == 2:
         ref_name, var_name = split_key
     elif len(split_key) == 1:
@@ -191,13 +183,13 @@ def _get_virtual_variable(
     return ref_name, var_name, virtual_var
 
 
-def calculate_dimensions(variables: Mapping[Any, Variable]) -> Dict[Hashable, int]:
+def calculate_dimensions(variables: Mapping[Any, Variable]) -> dict[Hashable, int]:
     """Calculate the dimensions corresponding to a set of variables.
 
     Returns dictionary mapping from dimension names to sizes. Raises ValueError
     if any of the dimension sizes conflict.
     """
-    dims: Dict[Hashable, int] = {}
+    dims: dict[Hashable, int] = {}
     last_used = {}
     scalar_vars = {k for k, v in variables.items() if not v.dims}
     for k, var in variables.items():
@@ -218,28 +210,28 @@ def calculate_dimensions(variables: Mapping[Any, Variable]) -> Dict[Hashable, in
 
 
 def merge_indexes(
-    indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]],
+    indexes: Mapping[Any, Hashable | Sequence[Hashable]],
     variables: Mapping[Any, Variable],
-    coord_names: Set[Hashable],
+    coord_names: set[Hashable],
     append: bool = False,
-) -> Tuple[Dict[Hashable, Variable], Set[Hashable]]:
+) -> tuple[dict[Hashable, Variable], set[Hashable]]:
     """Merge variables into multi-indexes.
 
     Not public API. Used in Dataset and DataArray set_index
     methods.
     """
-    vars_to_replace: Dict[Hashable, Variable] = {}
-    vars_to_remove: List[Hashable] = []
-    dims_to_replace: Dict[Hashable, Hashable] = {}
+    vars_to_replace: dict[Hashable, Variable] = {}
+    vars_to_remove: list[Hashable] = []
+    dims_to_replace: dict[Hashable, Hashable] = {}
     error_msg = "{} is not the name of an existing variable."
 
     for dim, var_names in indexes.items():
         if isinstance(var_names, str) or not isinstance(var_names, Sequence):
             var_names = [var_names]
 
-        names: List[Hashable] = []
-        codes: List[List[int]] = []
-        levels: List[List[int]] = []
+        names: list[Hashable] = []
+        codes: list[list[int]] = []
+        levels: list[list[int]] = []
         current_index_variable = variables.get(dim)
 
         for n in var_names:
@@ -302,12 +294,12 @@ def merge_indexes(
 
 
 def split_indexes(
-    dims_or_levels: Union[Hashable, Sequence[Hashable]],
+    dims_or_levels: Hashable | Sequence[Hashable],
     variables: Mapping[Any, Variable],
-    coord_names: Set[Hashable],
+    coord_names: set[Hashable],
     level_coords: Mapping[Any, Hashable],
     drop: bool = False,
-) -> Tuple[Dict[Hashable, Variable], Set[Hashable]]:
+) -> tuple[dict[Hashable, Variable], set[Hashable]]:
     """Extract (multi-)indexes (levels) as variables.
 
     Not public API. Used in Dataset and DataArray reset_index
@@ -316,7 +308,7 @@ def split_indexes(
     if isinstance(dims_or_levels, str) or not isinstance(dims_or_levels, Sequence):
         dims_or_levels = [dims_or_levels]
 
-    dim_levels: DefaultDict[Any, List[Hashable]] = defaultdict(list)
+    dim_levels: DefaultDict[Any, list[Hashable]] = defaultdict(list)
     dims = []
     for k in dims_or_levels:
         if k in level_coords:
@@ -325,7 +317,7 @@ def split_indexes(
             dims.append(k)
 
     vars_to_replace = {}
-    vars_to_create: Dict[Hashable, Variable] = {}
+    vars_to_create: dict[Hashable, Variable] = {}
     vars_to_remove = []
 
     for d in dims:
@@ -388,7 +380,7 @@ def _check_chunks_compatibility(var, chunks, preferred_chunks):
 
 
 def _get_chunk(var, chunks):
-    # chunks need to be explicity computed to take correctly into accout
+    # chunks need to be explicitly computed to take correctly into account
     # backend preferred chunking
     import dask.array as da
 
@@ -448,7 +440,7 @@ def _maybe_chunk(
         return var
 
 
-def as_dataset(obj: Any) -> "Dataset":
+def as_dataset(obj: Any) -> Dataset:
     """Cast the given object to a Dataset.
 
     Handles Datasets, DataArrays and dictionaries of variables. A new Dataset
@@ -521,7 +513,7 @@ def _initialize_feasible(lb, ub):
 class DataVariables(Mapping[Any, "DataArray"]):
     __slots__ = ("_dataset",)
 
-    def __init__(self, dataset: "Dataset"):
+    def __init__(self, dataset: Dataset):
         self._dataset = dataset
 
     def __iter__(self) -> Iterator[Hashable]:
@@ -537,7 +529,7 @@ def __len__(self) -> int:
     def __contains__(self, key: Hashable) -> bool:
         return key in self._dataset._variables and key not in self._dataset._coord_names
 
-    def __getitem__(self, key: Hashable) -> "DataArray":
+    def __getitem__(self, key: Hashable) -> DataArray:
         if key not in self._dataset._coord_names:
             return cast("DataArray", self._dataset[key])
         raise KeyError(key)
@@ -562,10 +554,10 @@ def _ipython_key_completions_(self):
 class _LocIndexer:
     __slots__ = ("dataset",)
 
-    def __init__(self, dataset: "Dataset"):
+    def __init__(self, dataset: Dataset):
         self.dataset = dataset
 
-    def __getitem__(self, key: Mapping[Any, Any]) -> "Dataset":
+    def __getitem__(self, key: Mapping[Any, Any]) -> Dataset:
         if not utils.is_dict_like(key):
             raise TypeError("can only lookup dictionaries from Dataset.loc")
         return self.dataset.sel(key)
@@ -705,14 +697,14 @@ class Dataset(DataWithCoords, DatasetReductions, DatasetArithmetic, Mapping):
         description:  Weather related data.
     """
 
-    _attrs: Optional[Dict[Hashable, Any]]
-    _cache: Dict[str, Any]
-    _coord_names: Set[Hashable]
-    _dims: Dict[Hashable, int]
-    _encoding: Optional[Dict[Hashable, Any]]
-    _close: Optional[Callable[[], None]]
-    _indexes: Optional[Dict[Hashable, Index]]
-    _variables: Dict[Hashable, Variable]
+    _attrs: dict[Hashable, Any] | None
+    _cache: dict[str, Any]
+    _coord_names: set[Hashable]
+    _dims: dict[Hashable, int]
+    _encoding: dict[Hashable, Any] | None
+    _close: Callable[[], None] | None
+    _indexes: dict[Hashable, Index] | None
+    _variables: dict[Hashable, Variable]
 
     __slots__ = (
         "_attrs",
@@ -769,7 +761,7 @@ def __init__(
         self._indexes = indexes
 
     @classmethod
-    def load_store(cls, store, decoder=None) -> "Dataset":
+    def load_store(cls, store, decoder=None) -> Dataset:
         """Create a new dataset from the contents of a backends.*DataStore
         object
         """
@@ -792,7 +784,7 @@ def variables(self) -> Mapping[Hashable, Variable]:
         return Frozen(self._variables)
 
     @property
-    def attrs(self) -> Dict[Hashable, Any]:
+    def attrs(self) -> dict[Hashable, Any]:
         """Dictionary of global attributes on this dataset"""
         if self._attrs is None:
             self._attrs = {}
@@ -803,7 +795,7 @@ def attrs(self, value: Mapping[Any, Any]) -> None:
         self._attrs = dict(value)
 
     @property
-    def encoding(self) -> Dict:
+    def encoding(self) -> dict:
         """Dictionary of global encoding attributes on this dataset"""
         if self._encoding is None:
             self._encoding = {}
@@ -840,7 +832,7 @@ def sizes(self) -> Mapping[Hashable, int]:
         """
         return self.dims
 
-    def load(self, **kwargs) -> "Dataset":
+    def load(self, **kwargs) -> Dataset:
         """Manually trigger loading and/or computation of this dataset's data
         from disk or a remote source into memory and return this dataset.
         Unlike compute, the original dataset is modified and returned.
@@ -914,11 +906,11 @@ def __dask_layers__(self):
         import dask
 
         return sum(
-            [
+            (
                 v.__dask_layers__()
                 for v in self.variables.values()
                 if dask.is_dask_collection(v)
-            ],
+            ),
             (),
         )
 
@@ -940,7 +932,7 @@ def __dask_postcompute__(self):
     def __dask_postpersist__(self):
         return self._dask_postpersist, ()
 
-    def _dask_postcompute(self, results: "Iterable[Variable]") -> "Dataset":
+    def _dask_postcompute(self, results: Iterable[Variable]) -> Dataset:
         import dask
 
         variables = {}
@@ -964,7 +956,7 @@ def _dask_postcompute(self, results: "Iterable[Variable]") -> "Dataset":
 
     def _dask_postpersist(
         self, dsk: Mapping, *, rename: Mapping[str, str] = None
-    ) -> "Dataset":
+    ) -> Dataset:
         from dask import is_dask_collection
         from dask.highlevelgraph import HighLevelGraph
         from dask.optimization import cull
@@ -1013,7 +1005,7 @@ def _dask_postpersist(
             self._close,
         )
 
-    def compute(self, **kwargs) -> "Dataset":
+    def compute(self, **kwargs) -> Dataset:
         """Manually trigger loading and/or computation of this dataset's data
         from disk or a remote source into memory and return a new dataset.
         Unlike load, the original dataset is left unaltered.
@@ -1035,7 +1027,7 @@ def compute(self, **kwargs) -> "Dataset":
         new = self.copy(deep=False)
         return new.load(**kwargs)
 
-    def _persist_inplace(self, **kwargs) -> "Dataset":
+    def _persist_inplace(self, **kwargs) -> Dataset:
         """Persist all Dask arrays in memory"""
         # access .data to coerce everything to numpy or dask arrays
         lazy_data = {
@@ -1052,7 +1044,7 @@ def _persist_inplace(self, **kwargs) -> "Dataset":
 
         return self
 
-    def persist(self, **kwargs) -> "Dataset":
+    def persist(self, **kwargs) -> Dataset:
         """Trigger computation, keeping data as dask arrays
 
         This operation can be used to trigger computation on underlying dask
@@ -1076,14 +1068,14 @@ def persist(self, **kwargs) -> "Dataset":
     @classmethod
     def _construct_direct(
         cls,
-        variables: Dict[Any, Variable],
-        coord_names: Set[Hashable],
-        dims: Dict[Any, int] = None,
-        attrs: Dict = None,
-        indexes: Dict[Any, Index] = None,
-        encoding: Dict = None,
+        variables: dict[Any, Variable],
+        coord_names: set[Hashable],
+        dims: dict[Any, int] = None,
+        attrs: dict = None,
+        indexes: dict[Any, Index] = None,
+        encoding: dict = None,
         close: Callable[[], None] = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Shortcut around __init__ for internal use when we want to skip
         costly validation
         """
@@ -1101,14 +1093,14 @@ def _construct_direct(
 
     def _replace(
         self,
-        variables: Dict[Hashable, Variable] = None,
-        coord_names: Set[Hashable] = None,
-        dims: Dict[Any, int] = None,
-        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
-        indexes: Union[Dict[Hashable, Index], None, Default] = _default,
-        encoding: Union[dict, None, Default] = _default,
+        variables: dict[Hashable, Variable] = None,
+        coord_names: set[Hashable] = None,
+        dims: dict[Any, int] = None,
+        attrs: dict[Hashable, Any] | None | Default = _default,
+        indexes: dict[Hashable, Index] | None | Default = _default,
+        encoding: dict | None | Default = _default,
         inplace: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Fastpath constructor for internal use.
 
         Returns an object with optionally with replaced attributes.
@@ -1151,12 +1143,12 @@ def _replace(
 
     def _replace_with_new_dims(
         self,
-        variables: Dict[Hashable, Variable],
+        variables: dict[Hashable, Variable],
         coord_names: set = None,
-        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
-        indexes: Union[Dict[Hashable, Index], None, Default] = _default,
+        attrs: dict[Hashable, Any] | None | Default = _default,
+        indexes: dict[Hashable, Index] | None | Default = _default,
         inplace: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Replace variables with recalculated dimensions."""
         dims = calculate_dimensions(variables)
         return self._replace(
@@ -1165,12 +1157,12 @@ def _replace_with_new_dims(
 
     def _replace_vars_and_dims(
         self,
-        variables: Dict[Hashable, Variable],
+        variables: dict[Hashable, Variable],
         coord_names: set = None,
-        dims: Dict[Hashable, int] = None,
-        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
+        dims: dict[Hashable, int] = None,
+        attrs: dict[Hashable, Any] | None | Default = _default,
         inplace: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Deprecated version of _replace_with_new_dims().
 
         Unlike _replace_with_new_dims(), this method always recalculates
@@ -1182,7 +1174,7 @@ def _replace_vars_and_dims(
             variables, coord_names, dims, attrs, indexes=None, inplace=inplace
         )
 
-    def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset":
+    def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> Dataset:
         if not indexes:
             return self
 
@@ -1194,7 +1186,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset":
         obj = self._replace(variables, indexes=new_indexes)
 
         # switch from dimension to level names, if necessary
-        dim_names: Dict[Hashable, str] = {}
+        dim_names: dict[Hashable, str] = {}
         for dim, idx in indexes.items():
             pd_idx = idx.to_pandas_index()
             if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim:
@@ -1203,7 +1195,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset":
             obj = obj.rename(dim_names)
         return obj
 
-    def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset":
+    def copy(self, deep: bool = False, data: Mapping = None) -> Dataset:
         """Returns a copy of this dataset.
 
         If `deep=True`, a deep copy is made of each of the component variables.
@@ -1328,7 +1320,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset":
 
         return self._replace(variables, attrs=attrs)
 
-    def as_numpy(self: "Dataset") -> "Dataset":
+    def as_numpy(self: Dataset) -> Dataset:
         """
         Coerces wrapped data and coordinates into numpy arrays, returning a Dataset.
 
@@ -1341,11 +1333,11 @@ def as_numpy(self: "Dataset") -> "Dataset":
         return self._replace(variables=numpy_variables)
 
     @property
-    def _level_coords(self) -> Dict[str, Hashable]:
+    def _level_coords(self) -> dict[str, Hashable]:
         """Return a mapping of all MultiIndex levels and their corresponding
         coordinate name.
         """
-        level_coords: Dict[str, Hashable] = {}
+        level_coords: dict[str, Hashable] = {}
         for name, index in self.xindexes.items():
             # TODO: benbovy - flexible indexes: update when MultIndex has its own xarray class.
             pd_index = index.to_pandas_index()
@@ -1355,13 +1347,13 @@ def _level_coords(self) -> Dict[str, Hashable]:
                 level_coords.update({lname: dim for lname in level_names})
         return level_coords
 
-    def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset":
+    def _copy_listed(self, names: Iterable[Hashable]) -> Dataset:
         """Create a new Dataset with the listed variables from this dataset and
         the all relevant coordinates. Skips all validation.
         """
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         coord_names = set()
-        indexes: Dict[Hashable, Index] = {}
+        indexes: dict[Hashable, Index] = {}
 
         for name in names:
             try:
@@ -1395,7 +1387,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset":
 
         return self._replace(variables, coord_names, dims, indexes=indexes)
 
-    def _construct_dataarray(self, name: Hashable) -> "DataArray":
+    def _construct_dataarray(self, name: Hashable) -> DataArray:
         """Construct a DataArray by indexing this dataset"""
         from .dataarray import DataArray
 
@@ -1408,7 +1400,7 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray":
 
         needed_dims = set(variable.dims)
 
-        coords: Dict[Hashable, Variable] = {}
+        coords: dict[Hashable, Variable] = {}
         # preserve ordering
         for k in self._variables:
             if k in self._coord_names and set(self.variables[k].dims) <= needed_dims:
@@ -1421,10 +1413,10 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray":
 
         return DataArray(variable, coords, name=name, indexes=indexes, fastpath=True)
 
-    def __copy__(self) -> "Dataset":
+    def __copy__(self) -> Dataset:
         return self.copy(deep=False)
 
-    def __deepcopy__(self, memo=None) -> "Dataset":
+    def __deepcopy__(self, memo=None) -> Dataset:
         # memo does nothing but is required for compatibility with
         # copy.deepcopy
         return self.copy(deep=True)
@@ -1483,15 +1475,15 @@ def loc(self) -> _LocIndexer:
 
     # FIXME https://github.com/python/mypy/issues/7328
     @overload
-    def __getitem__(self, key: Mapping) -> "Dataset":  # type: ignore[misc]
+    def __getitem__(self, key: Mapping) -> Dataset:  # type: ignore[misc]
         ...
 
     @overload
-    def __getitem__(self, key: Hashable) -> "DataArray":  # type: ignore[misc]
+    def __getitem__(self, key: Hashable) -> DataArray:  # type: ignore[misc]
         ...
 
     @overload
-    def __getitem__(self, key: Any) -> "Dataset":
+    def __getitem__(self, key: Any) -> Dataset:
         ...
 
     def __getitem__(self, key):
@@ -1508,7 +1500,7 @@ def __getitem__(self, key):
         else:
             return self._copy_listed(key)
 
-    def __setitem__(self, key: Union[Hashable, List[Hashable], Mapping], value) -> None:
+    def __setitem__(self, key: Hashable | list[Hashable] | Mapping, value) -> None:
         """Add an array to this dataset.
         Multiple arrays can be added at the same time, in which case each of
         the following operations is applied to the respective value.
@@ -1538,7 +1530,7 @@ def __setitem__(self, key: Union[Hashable, List[Hashable], Mapping], value) -> N
                 except Exception as e:
                     if processed:
                         raise RuntimeError(
-                            "An error occured while setting values of the"
+                            "An error occurred while setting values of the"
                             f" variable '{name}'. The following variables have"
                             f" been successfully updated:\n{processed}"
                         ) from e
@@ -1617,7 +1609,7 @@ def _setitem_check(self, key, value):
                             f"Variable '{name}': dimension '{dim}' appears in new values "
                             f"but not in the indexed original data"
                         )
-                dims = tuple([dim for dim in var_k.dims if dim in val.dims])
+                dims = tuple(dim for dim in var_k.dims if dim in val.dims)
                 if dims != val.dims:
                     raise ValueError(
                         f"Variable '{name}': dimension order differs between"
@@ -1648,7 +1640,7 @@ def __delitem__(self, key: Hashable) -> None:
     # https://github.com/python/mypy/issues/4266
     __hash__ = None  # type: ignore[assignment]
 
-    def _all_compat(self, other: "Dataset", compat_str: str) -> bool:
+    def _all_compat(self, other: Dataset, compat_str: str) -> bool:
         """Helper function for equals and identical"""
 
         # some stores (e.g., scipy) do not seem to preserve order, so don't
@@ -1660,7 +1652,7 @@ def compat(x: Variable, y: Variable) -> bool:
             self._variables, other._variables, compat=compat
         )
 
-    def broadcast_equals(self, other: "Dataset") -> bool:
+    def broadcast_equals(self, other: Dataset) -> bool:
         """Two Datasets are broadcast equal if they are equal after
         broadcasting all variables against each other.
 
@@ -1678,7 +1670,7 @@ def broadcast_equals(self, other: "Dataset") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def equals(self, other: "Dataset") -> bool:
+    def equals(self, other: Dataset) -> bool:
         """Two Datasets are equal if they have matching variables and
         coordinates, all of which are equal.
 
@@ -1698,7 +1690,7 @@ def equals(self, other: "Dataset") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def identical(self, other: "Dataset") -> bool:
+    def identical(self, other: Dataset) -> bool:
         """Like equals, but also checks all dataset attributes and the
         attributes on all variables and coordinates.
 
@@ -1747,7 +1739,7 @@ def data_vars(self) -> DataVariables:
         """Dictionary of DataArray objects corresponding to data variables"""
         return DataVariables(self)
 
-    def set_coords(self, names: "Union[Hashable, Iterable[Hashable]]") -> "Dataset":
+    def set_coords(self, names: Hashable | Iterable[Hashable]) -> Dataset:
         """Given names of one or more variables, set them as coordinates
 
         Parameters
@@ -1778,9 +1770,9 @@ def set_coords(self, names: "Union[Hashable, Iterable[Hashable]]") -> "Dataset":
 
     def reset_coords(
         self,
-        names: "Union[Hashable, Iterable[Hashable], None]" = None,
+        names: Hashable | Iterable[Hashable] | None = None,
         drop: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Given names of coordinates, reset them to become variables
 
         Parameters
@@ -1816,7 +1808,7 @@ def reset_coords(
                 del obj._variables[name]
         return obj
 
-    def dump_to_store(self, store: "AbstractDataStore", **kwargs) -> None:
+    def dump_to_store(self, store: AbstractDataStore, **kwargs) -> None:
         """Store dataset contents to a backends.*DataStore object."""
         from ..backends.api import dump_to_store
 
@@ -1835,7 +1827,7 @@ def to_netcdf(
         unlimited_dims: Iterable[Hashable] = None,
         compute: bool = True,
         invalid_netcdf: bool = False,
-    ) -> Union[bytes, "Delayed", None]:
+    ) -> bytes | Delayed | None:
         """Write dataset contents to a netCDF file.
 
         Parameters
@@ -1901,7 +1893,7 @@ def to_netcdf(
         invalid_netcdf: bool, default: False
             Only valid along with ``engine="h5netcdf"``. If True, allow writing
             hdf5 files which are invalid netcdf as described in
-            https://github.com/shoyer/h5netcdf.
+            https://github.com/h5netcdf/h5netcdf.
         """
         if encoding is None:
             encoding = {}
@@ -1922,19 +1914,19 @@ def to_netcdf(
 
     def to_zarr(
         self,
-        store: Union[MutableMapping, str, PathLike] = None,
-        chunk_store: Union[MutableMapping, str, PathLike] = None,
+        store: MutableMapping | str | PathLike | None = None,
+        chunk_store: MutableMapping | str | PathLike | None = None,
         mode: str = None,
         synchronizer=None,
         group: str = None,
         encoding: Mapping = None,
         compute: bool = True,
-        consolidated: Optional[bool] = None,
+        consolidated: bool | None = None,
         append_dim: Hashable = None,
         region: Mapping[str, slice] = None,
         safe_chunks: bool = True,
-        storage_options: Dict[str, str] = None,
-    ) -> "ZarrStore":
+        storage_options: dict[str, str] = None,
+    ) -> ZarrStore:
         """Write dataset contents to a zarr group.
 
         Zarr chunks are determined in the following way:
@@ -1985,7 +1977,7 @@ def to_zarr(
             metadata for existing stores (falling back to non-consolidated).
         append_dim : hashable, optional
             If set, the dimension along which the data will be appended. All
-            other dimensions on overriden variables must remain the same size.
+            other dimensions on overridden variables must remain the same size.
         region : dict, optional
             Optional mapping from dimension names to integer slices along
             dataset dimensions to indicate the region of existing zarr array(s)
@@ -2010,7 +2002,7 @@ def to_zarr(
             Set False to override this restriction; however, data may become corrupted
             if Zarr arrays are written in parallel. This option may be useful in combination
             with ``compute=False`` to initialize a Zarr from an existing
-            Dataset with aribtrary chunk structure.
+            Dataset with arbitrary chunk structure.
         storage_options : dict, optional
             Any additional parameters for the storage backend (ignored for local
             paths).
@@ -2102,7 +2094,7 @@ def info(self, buf=None) -> None:
         buf.write("\n".join(lines))
 
     @property
-    def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
+    def chunks(self) -> Mapping[Hashable, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataset's data, or None if
         the underlying data is not a dask array.
@@ -2119,7 +2111,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         return get_chunksizes(self.variables.values())
 
     @property
-    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataset's data, or None if
         the underlying data is not a dask array.
@@ -2137,15 +2129,13 @@ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
 
     def chunk(
         self,
-        chunks: Union[
-            int,
-            Literal["auto"],
-            Mapping[Any, Union[None, int, str, Tuple[int, ...]]],
-        ] = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
+        chunks: (
+            int | Literal["auto"] | Mapping[Any, None | int | str | tuple[int, ...]]
+        ) = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Coerce all arrays in this dataset into dask arrays with the given
         chunks.
 
@@ -2204,7 +2194,7 @@ def chunk(
 
     def _validate_indexers(
         self, indexers: Mapping[Any, Any], missing_dims: str = "raise"
-    ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]:
+    ) -> Iterator[tuple[Hashable, int | slice | np.ndarray | Variable]]:
         """Here we make sure
         + indexer has a valid keys
         + indexer is in a valid data type
@@ -2248,7 +2238,7 @@ def _validate_indexers(
 
     def _validate_interp_indexers(
         self, indexers: Mapping[Any, Any]
-    ) -> Iterator[Tuple[Hashable, Variable]]:
+    ) -> Iterator[tuple[Hashable, Variable]]:
         """Variant of _validate_indexers to be used for interpolation"""
         for k, v in self._validate_indexers(indexers):
             if isinstance(v, Variable):
@@ -2312,7 +2302,7 @@ def isel(
         drop: bool = False,
         missing_dims: str = "raise",
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with each array indexed along the specified
         dimension(s).
 
@@ -2367,7 +2357,7 @@ def isel(
         indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
 
         variables = {}
-        dims: Dict[Hashable, int] = {}
+        dims: dict[Hashable, int] = {}
         coord_names = self._coord_names.copy()
         indexes = self._indexes.copy() if self._indexes is not None else None
 
@@ -2404,13 +2394,13 @@ def _isel_fancy(
         *,
         drop: bool,
         missing_dims: str = "raise",
-    ) -> "Dataset":
+    ) -> Dataset:
         # Note: we need to preserve the original indexers variable in order to merge the
         # coords below
         indexers_list = list(self._validate_indexers(indexers, missing_dims))
 
-        variables: Dict[Hashable, Variable] = {}
-        indexes: Dict[Hashable, Index] = {}
+        variables: dict[Hashable, Variable] = {}
+        indexes: dict[Hashable, Index] = {}
 
         for name, var in self.variables.items():
             var_indexers = {k: v for k, v in indexers_list if k in var.dims}
@@ -2447,7 +2437,7 @@ def sel(
         tolerance: Number = None,
         drop: bool = False,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with each array indexed by tick labels
         along the specified dimension(s).
 
@@ -2521,9 +2511,9 @@ def sel(
 
     def head(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with the first `n` values of each array
         for the specified dimension(s).
 
@@ -2567,9 +2557,9 @@ def head(
 
     def tail(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with the last `n` values of each array
         for the specified dimension(s).
 
@@ -2616,9 +2606,9 @@ def tail(
 
     def thin(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with each array indexed along every `n`-th
         value for the specified dimension(s)
 
@@ -2664,8 +2654,8 @@ def thin(
         return self.isel(indexers_slices)
 
     def broadcast_like(
-        self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None
-    ) -> "Dataset":
+        self, other: Dataset | DataArray, exclude: Iterable[Hashable] = None
+    ) -> Dataset:
         """Broadcast this DataArray against another Dataset or DataArray.
         This is equivalent to xr.broadcast(other, self)[1]
 
@@ -2689,12 +2679,12 @@ def broadcast_like(
 
     def reindex_like(
         self,
-        other: Union["Dataset", "DataArray"],
+        other: Dataset | DataArray,
         method: str = None,
-        tolerance: Number = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = dtypes.NA,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -2719,6 +2709,10 @@ def reindex_like(
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
+            Tolerance may be a scalar value, which applies the same tolerance
+            to all values, or list-like, which applies variable tolerance per
+            element. List-like must be the same size as the index and its dtype
+            must exactly match the index’s type.
         copy : bool, optional
             If ``copy=True``, data in the return value is always copied. If
             ``copy=False`` and reindexing is unnecessary, or can be performed
@@ -2752,11 +2746,11 @@ def reindex(
         self,
         indexers: Mapping[Any, Any] = None,
         method: str = None,
-        tolerance: Number = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = dtypes.NA,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Conform this object onto a new set of indexes, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -2780,6 +2774,10 @@ def reindex(
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations must
             satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
+            Tolerance may be a scalar value, which applies the same tolerance
+            to all values, or list-like, which applies variable tolerance per
+            element. List-like must be the same size as the index and its dtype
+            must exactly match the index’s type.
         copy : bool, optional
             If ``copy=True``, data in the return value is always copied. If
             ``copy=False`` and reindexing is unnecessary, or can be performed
@@ -2962,12 +2960,12 @@ def _reindex(
         self,
         indexers: Mapping[Any, Any] = None,
         method: str = None,
-        tolerance: Number = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = dtypes.NA,
         sparse: bool = False,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         same to _reindex but support sparse option
         """
@@ -3000,7 +2998,7 @@ def interp(
         kwargs: Mapping[str, Any] = None,
         method_non_numeric: str = "nearest",
         **coords_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Multidimensional interpolation of Dataset.
 
         Parameters
@@ -3182,8 +3180,8 @@ def _validate_interp_indexer(x, new_x):
                 for k, (index, dest) in validated_indexers.items()
             }
 
-        variables: Dict[Hashable, Variable] = {}
-        to_reindex: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
+        to_reindex: dict[Hashable, Variable] = {}
         for name, var in obj._variables.items():
             if name in indexers:
                 continue
@@ -3248,12 +3246,12 @@ def _validate_interp_indexer(x, new_x):
 
     def interp_like(
         self,
-        other: Union["Dataset", "DataArray"],
+        other: Dataset | DataArray,
         method: str = "linear",
         assume_sorted: bool = False,
         kwargs: Mapping[str, Any] = None,
         method_non_numeric: str = "nearest",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Interpolate this object onto the coordinates of another object,
         filling the out of range values with NaN.
 
@@ -3299,8 +3297,8 @@ def interp_like(
             kwargs = {}
         coords = alignment.reindex_like_indexers(self, other)
 
-        numeric_coords: Dict[Hashable, pd.Index] = {}
-        object_coords: Dict[Hashable, pd.Index] = {}
+        numeric_coords: dict[Hashable, pd.Index] = {}
+        object_coords: dict[Hashable, pd.Index] = {}
         for k, v in coords.items():
             if v.dtype.kind in "uifcMm":
                 numeric_coords[k] = v
@@ -3366,7 +3364,7 @@ def rename(
         self,
         name_dict: Mapping[Any, Hashable] = None,
         **names: Hashable,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with renamed variables and dimensions.
 
         Parameters
@@ -3406,7 +3404,7 @@ def rename(
 
     def rename_dims(
         self, dims_dict: Mapping[Any, Hashable] = None, **dims: Hashable
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with renamed dimensions only.
 
         Parameters
@@ -3451,7 +3449,7 @@ def rename_dims(
 
     def rename_vars(
         self, name_dict: Mapping[Any, Hashable] = None, **names: Hashable
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with renamed variables including coordinates
 
         Parameters
@@ -3489,7 +3487,7 @@ def rename_vars(
 
     def swap_dims(
         self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with swapped dimensions.
 
         Parameters
@@ -3569,8 +3567,8 @@ def swap_dims(
         coord_names = self._coord_names.copy()
         coord_names.update({dim for dim in dims_dict.values() if dim in self.variables})
 
-        variables: Dict[Hashable, Variable] = {}
-        indexes: Dict[Hashable, Index] = {}
+        variables: dict[Hashable, Variable] = {}
+        indexes: dict[Hashable, Index] = {}
         for k, v in self.variables.items():
             dims = tuple(dims_dict.get(dim, dim) for dim in v.dims)
             if k in result_dims:
@@ -3595,10 +3593,10 @@ def swap_dims(
 
     def expand_dims(
         self,
-        dim: Union[None, Hashable, Sequence[Hashable], Mapping[Any, Any]] = None,
-        axis: Union[None, int, Sequence[int]] = None,
+        dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
+        axis: None | int | Sequence[int] = None,
         **dim_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return a new object with an additional axis (or axes) inserted at
         the corresponding position in the array shape.  The new object is a
         view into the underlying array, not a copy.
@@ -3668,7 +3666,7 @@ def expand_dims(
                     " variable name.".format(dim=d)
                 )
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         coord_names = self._coord_names.copy()
         # If dim is a dict, then ensure that the values are either integers
         # or iterables.
@@ -3727,10 +3725,10 @@ def expand_dims(
 
     def set_index(
         self,
-        indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]] = None,
+        indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None,
         append: bool = False,
-        **indexes_kwargs: Union[Hashable, Sequence[Hashable]],
-    ) -> "Dataset":
+        **indexes_kwargs: Hashable | Sequence[Hashable],
+    ) -> Dataset:
         """Set Dataset (multi-)indexes using one or more existing coordinates
         or variables.
 
@@ -3791,9 +3789,9 @@ def set_index(
 
     def reset_index(
         self,
-        dims_or_levels: Union[Hashable, Sequence[Hashable]],
+        dims_or_levels: Hashable | Sequence[Hashable],
         drop: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Reset the specified index(es) or multi-index level(s).
 
         Parameters
@@ -3827,7 +3825,7 @@ def reorder_levels(
         self,
         dim_order: Mapping[Any, Sequence[int]] = None,
         **dim_order_kwargs: Sequence[int],
-    ) -> "Dataset":
+    ) -> Dataset:
         """Rearrange index levels using input order.
 
         Parameters
@@ -3898,7 +3896,7 @@ def stack(
         self,
         dimensions: Mapping[Any, Sequence[Hashable]] = None,
         **dimensions_kwargs: Sequence[Hashable],
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         Stack any number of existing dimensions into a single new dimension.
 
@@ -3938,7 +3936,7 @@ def to_stacked_array(
         sample_dims: Collection,
         variable_dim: Hashable = "variable",
         name: Hashable = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Combine variables of differing dimensionality into a DataArray
         without broadcasting.
 
@@ -4055,13 +4053,11 @@ def ensure_stackable(val):
 
         return data_array
 
-    def _unstack_once(
-        self, dim: Hashable, fill_value, sparse: bool = False
-    ) -> "Dataset":
+    def _unstack_once(self, dim: Hashable, fill_value, sparse: bool = False) -> Dataset:
         index = self.get_index(dim)
         index = remove_unused_levels_categories(index)
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         indexes = {k: v for k, v in self.xindexes.items() if k != dim}
 
         for name, var in self.variables.items():
@@ -4089,9 +4085,7 @@ def _unstack_once(
             variables, coord_names=coord_names, indexes=indexes
         )
 
-    def _unstack_full_reindex(
-        self, dim: Hashable, fill_value, sparse: bool
-    ) -> "Dataset":
+    def _unstack_full_reindex(self, dim: Hashable, fill_value, sparse: bool) -> Dataset:
         index = self.get_index(dim)
         index = remove_unused_levels_categories(index)
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
@@ -4107,7 +4101,7 @@ def _unstack_full_reindex(
         new_dim_names = index.names
         new_dim_sizes = [lev.size for lev in index.levels]
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         indexes = {k: v for k, v in self.xindexes.items() if k != dim}
 
         for name, var in obj.variables.items():
@@ -4131,10 +4125,10 @@ def _unstack_full_reindex(
 
     def unstack(
         self,
-        dim: Union[Hashable, Iterable[Hashable]] = None,
+        dim: Hashable | Iterable[Hashable] = None,
         fill_value: Any = dtypes.NA,
         sparse: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         Unstack existing dimensions corresponding to MultiIndexes into
         multiple new dimensions.
@@ -4221,7 +4215,7 @@ def unstack(
                 result = result._unstack_once(dim, fill_value, sparse)
         return result
 
-    def update(self, other: "CoercibleMapping") -> "Dataset":
+    def update(self, other: CoercibleMapping) -> Dataset:
         """Update this dataset's variables with those from another dataset.
 
         Just like :py:meth:`dict.update` this is a in-place operation.
@@ -4262,13 +4256,13 @@ def update(self, other: "CoercibleMapping") -> "Dataset":
 
     def merge(
         self,
-        other: Union["CoercibleMapping", "DataArray"],
-        overwrite_vars: Union[Hashable, Iterable[Hashable]] = frozenset(),
+        other: CoercibleMapping | DataArray,
+        overwrite_vars: Hashable | Iterable[Hashable] = frozenset(),
         compat: str = "no_conflicts",
         join: str = "outer",
         fill_value: Any = dtypes.NA,
         combine_attrs: str = "override",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Merge the arrays of two datasets into a single dataset.
 
         This method generally does not allow for overriding data, with the
@@ -4359,8 +4353,8 @@ def _assert_all_in_dataset(
             )
 
     def drop_vars(
-        self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
-    ) -> "Dataset":
+        self, names: Hashable | Iterable[Hashable], *, errors: str = "raise"
+    ) -> Dataset:
         """Drop variables from this dataset.
 
         Parameters
@@ -4576,8 +4570,8 @@ def drop_isel(self, indexers=None, **indexers_kwargs):
         return ds
 
     def drop_dims(
-        self, drop_dims: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
-    ) -> "Dataset":
+        self, drop_dims: Hashable | Iterable[Hashable], *, errors: str = "raise"
+    ) -> Dataset:
         """Drop dimensions and associated variables from this dataset.
 
         Parameters
@@ -4617,7 +4611,7 @@ def transpose(
         self,
         *dims: Hashable,
         missing_dims: str = "raise",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return a new Dataset object with all array dimensions transposed.
 
         Although the order of dimensions on each array will change, the dataset
@@ -4723,7 +4717,7 @@ def dropna(
 
         return self.isel({dim: mask})
 
-    def fillna(self, value: Any) -> "Dataset":
+    def fillna(self, value: Any) -> Dataset:
         """Fill missing values in this object.
 
         This operation follows the normal broadcasting and alignment rules that
@@ -4808,12 +4802,12 @@ def interpolate_na(
         dim: Hashable = None,
         method: str = "linear",
         limit: int = None,
-        use_coordinate: Union[bool, Hashable] = True,
-        max_gap: Union[
-            int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta
-        ] = None,
+        use_coordinate: bool | Hashable = True,
+        max_gap: (
+            int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta
+        ) = None,
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Fill in NaNs by interpolating according to different methods.
 
         Parameters
@@ -4936,8 +4930,8 @@ def interpolate_na(
         )
         return new
 
-    def ffill(self, dim: Hashable, limit: int = None) -> "Dataset":
-        """Fill NaN values by propogating values forward
+    def ffill(self, dim: Hashable, limit: int = None) -> Dataset:
+        """Fill NaN values by propagating values forward
 
         *Requires bottleneck.*
 
@@ -4962,8 +4956,8 @@ def ffill(self, dim: Hashable, limit: int = None) -> "Dataset":
         new = _apply_over_vars_with_dim(ffill, self, dim=dim, limit=limit)
         return new
 
-    def bfill(self, dim: Hashable, limit: int = None) -> "Dataset":
-        """Fill NaN values by propogating values backward
+    def bfill(self, dim: Hashable, limit: int = None) -> Dataset:
+        """Fill NaN values by propagating values backward
 
         *Requires bottleneck.*
 
@@ -4988,7 +4982,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "Dataset":
         new = _apply_over_vars_with_dim(bfill, self, dim=dim, limit=limit)
         return new
 
-    def combine_first(self, other: "Dataset") -> "Dataset":
+    def combine_first(self, other: Dataset) -> Dataset:
         """Combine two Datasets, default to data_vars of self.
 
         The new coordinates follow the normal broadcasting and alignment rules
@@ -5010,13 +5004,13 @@ def combine_first(self, other: "Dataset") -> "Dataset":
     def reduce(
         self,
         func: Callable,
-        dim: Union[Hashable, Iterable[Hashable]] = None,
+        dim: Hashable | Iterable[Hashable] = None,
         *,
         keep_attrs: bool = None,
         keepdims: bool = False,
         numeric_only: bool = False,
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Reduce this dataset by applying `func` along some dimension(s).
 
         Parameters
@@ -5069,7 +5063,7 @@ def reduce(
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=False)
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         for name, var in self._variables.items():
             reduce_dims = [d for d in var.dims if d in dims]
             if name in self.coords:
@@ -5114,7 +5108,7 @@ def map(
         keep_attrs: bool = None,
         args: Iterable[Any] = (),
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Apply a function to each variable in this dataset
 
         Parameters
@@ -5124,9 +5118,9 @@ def map(
             to transform each DataArray `x` in this dataset into another
             DataArray.
         keep_attrs : bool, optional
-            If True, the dataset's attributes (`attrs`) will be copied from
-            the original object to the new one. If False, the new object will
-            be returned without attributes.
+            If True, both the dataset's and variables' attributes (`attrs`) will be
+            copied from the original objects to the new ones. If False, the new dataset
+            and variables will be returned without copying the attributes.
         args : tuple, optional
             Positional arguments passed on to `func`.
         **kwargs : Any
@@ -5174,7 +5168,7 @@ def apply(
         keep_attrs: bool = None,
         args: Iterable[Any] = (),
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         Backward compatible implementation of ``map``
 
@@ -5191,7 +5185,7 @@ def apply(
 
     def assign(
         self, variables: Mapping[Any, Any] = None, **variables_kwargs: Hashable
-    ) -> "Dataset":
+    ) -> Dataset:
         """Assign new data variables to a Dataset, returning a new object
         with all the original variables in addition to the new ones.
 
@@ -5316,8 +5310,8 @@ def to_array(self, dim="variable", name=None):
         )
 
     def _normalize_dim_order(
-        self, dim_order: List[Hashable] = None
-    ) -> Dict[Hashable, int]:
+        self, dim_order: list[Hashable] = None
+    ) -> dict[Hashable, int]:
         """
         Check the validity of the provided dimensions if any and return the mapping
         between dimension name and their size.
@@ -5345,7 +5339,7 @@ def _normalize_dim_order(
 
         return ordered_dims
 
-    def to_pandas(self) -> Union[pd.Series, pd.DataFrame]:
+    def to_pandas(self) -> pd.Series | pd.DataFrame:
         """Convert this dataset into a pandas object without changing the number of dimensions.
 
         The type of the returned object depends on the number of Dataset
@@ -5375,7 +5369,7 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
         index = self.coords.to_index([*ordered_dims])
         return pd.DataFrame(dict(zip(columns, data)), index=index)
 
-    def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
+    def to_dataframe(self, dim_order: list[Hashable] = None) -> pd.DataFrame:
         """Convert this dataset into a pandas.DataFrame.
 
         Non-index variables in this dataset form the columns of the
@@ -5407,7 +5401,7 @@ def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
         return self._to_dataframe(ordered_dims=ordered_dims)
 
     def _set_sparse_data_from_dataframe(
-        self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple
+        self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
     ) -> None:
         from sparse import COO
 
@@ -5439,7 +5433,7 @@ def _set_sparse_data_from_dataframe(
             self[name] = (dims, data)
 
     def _set_numpy_data_from_dataframe(
-        self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple
+        self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
     ) -> None:
         if not isinstance(idx, pd.MultiIndex):
             for name, values in arrays:
@@ -5476,7 +5470,7 @@ def _set_numpy_data_from_dataframe(
             self[name] = (dims, data)
 
     @classmethod
-    def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Dataset":
+    def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Dataset:
         """Convert a pandas.DataFrame into an xarray.Dataset
 
         Each column will be converted into an independent variable in the
@@ -5649,31 +5643,7 @@ def to_dict(self, data=True):
 
     @classmethod
     def from_dict(cls, d):
-        """
-        Convert a dictionary into an xarray.Dataset.
-
-        Input dict can take several forms:
-
-        .. code:: python
-
-            d = {
-                "t": {"dims": ("t"), "data": t},
-                "a": {"dims": ("t"), "data": x},
-                "b": {"dims": ("t"), "data": y},
-            }
-
-            d = {
-                "coords": {"t": {"dims": "t", "data": t, "attrs": {"units": "s"}}},
-                "attrs": {"title": "air temperature"},
-                "dims": "t",
-                "data_vars": {
-                    "a": {"dims": "t", "data": x},
-                    "b": {"dims": "t", "data": y},
-                },
-            }
-
-        where "t" is the name of the dimesion, "a" and "b" are names of data
-        variables and t, x, and y are lists, numpy.arrays or pandas objects.
+        """Convert a dictionary into an xarray.Dataset.
 
         Parameters
         ----------
@@ -5690,6 +5660,47 @@ def from_dict(cls, d):
         --------
         Dataset.to_dict
         DataArray.from_dict
+
+        Examples
+        --------
+        >>> d = {
+        ...     "t": {"dims": ("t"), "data": [0, 1, 2]},
+        ...     "a": {"dims": ("t"), "data": ["a", "b", "c"]},
+        ...     "b": {"dims": ("t"), "data": [10, 20, 30]},
+        ... }
+        >>> ds = xr.Dataset.from_dict(d)
+        >>> ds
+        <xarray.Dataset>
+        Dimensions:  (t: 3)
+        Coordinates:
+          * t        (t) int64 0 1 2
+        Data variables:
+            a        (t) <U1 'a' 'b' 'c'
+            b        (t) int64 10 20 30
+
+        >>> d = {
+        ...     "coords": {
+        ...         "t": {"dims": "t", "data": [0, 1, 2], "attrs": {"units": "s"}}
+        ...     },
+        ...     "attrs": {"title": "air temperature"},
+        ...     "dims": "t",
+        ...     "data_vars": {
+        ...         "a": {"dims": "t", "data": [10, 20, 30]},
+        ...         "b": {"dims": "t", "data": ["a", "b", "c"]},
+        ...     },
+        ... }
+        >>> ds = xr.Dataset.from_dict(d)
+        >>> ds
+        <xarray.Dataset>
+        Dimensions:  (t: 3)
+        Coordinates:
+          * t        (t) int64 0 1 2
+        Data variables:
+            a        (t) int64 10 20 30
+            b        (t) <U1 'a' 'b' 'c'
+        Attributes:
+            title:    air temperature
+
         """
 
         if not {"coords", "data_vars"}.issubset(set(d)):
@@ -5912,7 +5923,7 @@ def shift(
         shifts: Mapping[Hashable, int] = None,
         fill_value: Any = dtypes.NA,
         **shifts_kwargs: int,
-    ) -> "Dataset":
+    ) -> Dataset:
 
         """Shift this dataset by an offset along one or more dimensions.
 
@@ -5982,7 +5993,7 @@ def roll(
         shifts: Mapping[Hashable, int] = None,
         roll_coords: bool = False,
         **shifts_kwargs: int,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Roll this dataset by an offset along one or more dimensions.
 
         Unlike shift, roll treats the given dimensions as periodic, so will not
@@ -6050,7 +6061,7 @@ def roll(
                 variables[k] = var
 
         if roll_coords:
-            indexes: Dict[Hashable, Index] = {}
+            indexes: dict[Hashable, Index] = {}
             idx: pd.Index
             for k, idx in self.xindexes.items():
                 (dim,) = self.variables[k].dims
@@ -6077,7 +6088,7 @@ def sortby(self, variables, ascending=True):
 
         If multiple sorts along the same dimension is
         given, numpy's lexsort is performed along that dimension:
-        https://docs.scipy.org/doc/numpy/reference/generated/numpy.lexsort.html
+        https://numpy.org/doc/stable/reference/generated/numpy.lexsort.html
         and the FIRST key in the sequence is used as the primary sort key,
         followed by the 2nd key, etc.
 
@@ -6146,12 +6157,13 @@ def sortby(self, variables, ascending=True):
 
     def quantile(
         self,
-        q,
-        dim=None,
-        interpolation="linear",
-        numeric_only=False,
-        keep_attrs=None,
-        skipna=True,
+        q: ArrayLike,
+        dim: str | Iterable[Hashable] | None = None,
+        method: QUANTILE_METHODS = "linear",
+        numeric_only: bool = False,
+        keep_attrs: bool = None,
+        skipna: bool = None,
+        interpolation: QUANTILE_METHODS = None,
     ):
         """Compute the qth quantile of the data along the specified dimension.
 
@@ -6164,18 +6176,34 @@ def quantile(
             Quantile to compute, which must be between 0 and 1 inclusive.
         dim : str or sequence of str, optional
             Dimension(s) over which to apply quantile.
-        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
+        method : str, default: "linear"
+            This optional parameter specifies the interpolation method to use when the
+            desired quantile lies between two data points. The options sorted by their R
+            type as summarized in the H&F paper [1]_ are:
+
+                1. "inverted_cdf" (*)
+                2. "averaged_inverted_cdf" (*)
+                3. "closest_observation" (*)
+                4. "interpolated_inverted_cdf" (*)
+                5. "hazen" (*)
+                6. "weibull" (*)
+                7. "linear"  (default)
+                8. "median_unbiased" (*)
+                9. "normal_unbiased" (*)
+
+            The first three methods are discontiuous.  The following discontinuous
+            variations of the default "linear" (7.) option are also available:
+
+                * "lower"
+                * "higher"
+                * "midpoint"
+                * "nearest"
+
+            See :py:func:`numpy.quantile` or [1]_ for a description. Methods marked with
+            an asterix require numpy version 1.22 or newer. The "method" argument was
+            previously called "interpolation", renamed in accordance with numpy
+            version 1.22.0.
+
         keep_attrs : bool, optional
             If True, the dataset's attributes (`attrs`) will be copied from
             the original object to the new one.  If False (default), the new
@@ -6183,7 +6211,10 @@ def quantile(
         numeric_only : bool, optional
             If True, only apply ``func`` to variables with a numeric dtype.
         skipna : bool, optional
-            Whether to skip missing values when aggregating.
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
 
         Returns
         -------
@@ -6234,17 +6265,37 @@ def quantile(
           * quantile  (quantile) float64 0.0 0.5 1.0
         Data variables:
             a         (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9
+
+        References
+        ----------
+        .. [1] R. J. Hyndman and Y. Fan,
+           "Sample quantiles in statistical packages,"
+           The American Statistician, 50(4), pp. 361-365, 1996
         """
 
+        # interpolation renamed to method in version 0.21.0
+        # check here and in variable to avoid repeated warnings
+        if interpolation is not None:
+            warnings.warn(
+                "The `interpolation` argument to quantile was renamed to `method`.",
+                FutureWarning,
+            )
+
+            if method != "linear":
+                raise TypeError("Cannot pass interpolation and method keywords!")
+
+            method = interpolation
+
+        dims: set[Hashable]
         if isinstance(dim, str):
             dims = {dim}
-        elif dim in [None, ...]:
+        elif dim is None or dim is ...:
             dims = set(self.dims)
         else:
             dims = set(dim)
 
         _assert_empty(
-            [d for d in dims if d not in self.dims],
+            tuple(d for d in dims if d not in self.dims),
             "Dataset does not contain the dimensions: %s",
         )
 
@@ -6260,15 +6311,10 @@ def quantile(
                         or np.issubdtype(var.dtype, np.number)
                         or var.dtype == np.bool_
                     ):
-                        if len(reduce_dims) == var.ndim:
-                            # prefer to aggregate over axis=None rather than
-                            # axis=(0, 1) if they will be equivalent, because
-                            # the former is often more efficient
-                            reduce_dims = None
                         variables[name] = var.quantile(
                             q,
                             dim=reduce_dims,
-                            interpolation=interpolation,
+                            method=method,
                             keep_attrs=keep_attrs,
                             skipna=skipna,
                         )
@@ -6402,9 +6448,9 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
 
     def integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]],
+        coord: Hashable | Sequence[Hashable],
         datetime_unit: str = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Integrate along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -6518,9 +6564,9 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False):
 
     def cumulative_integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]],
+        coord: Hashable | Sequence[Hashable],
         datetime_unit: str = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Integrate along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -6696,7 +6742,7 @@ def filter_by_attrs(self, **kwargs):
                 selection.append(var_name)
         return self[selection]
 
-    def unify_chunks(self) -> "Dataset":
+    def unify_chunks(self) -> Dataset:
         """Unify chunk size along all chunked dimensions of this Dataset.
 
         Returns
@@ -6712,11 +6758,11 @@ def unify_chunks(self) -> "Dataset":
 
     def map_blocks(
         self,
-        func: "Callable[..., T_Xarray]",
+        func: Callable[..., T_Xarray],
         args: Sequence[Any] = (),
         kwargs: Mapping[str, Any] = None,
-        template: Union["DataArray", "Dataset"] = None,
-    ) -> "T_Xarray":
+        template: DataArray | Dataset | None = None,
+    ) -> T_Xarray:
         """
         Apply a function to each block of this Dataset.
 
@@ -6823,9 +6869,9 @@ def polyfit(
         deg: int,
         skipna: bool = None,
         rcond: float = None,
-        w: Union[Hashable, Any] = None,
+        w: Hashable | Any = None,
         full: bool = False,
-        cov: Union[bool, str] = False,
+        cov: bool | str = False,
     ):
         """
         Least squares polynomial fit.
@@ -6889,7 +6935,7 @@ def polyfit(
         skipna_da = skipna
 
         x = get_clean_interp_index(self, dim, strict=False)
-        xname = "{}_".format(self[dim].name)
+        xname = f"{self[dim].name}_"
         order = int(deg) + 1
         lhs = np.vander(x, order)
 
@@ -6906,7 +6952,7 @@ def polyfit(
             if w.ndim != 1:
                 raise TypeError("Expected a 1-d array for weights.")
             if w.shape[0] != lhs.shape[0]:
-                raise TypeError("Expected w and {} to have the same length".format(dim))
+                raise TypeError(f"Expected w and {dim} to have the same length")
             lhs *= w[:, np.newaxis]
 
         # Scaling
@@ -6943,7 +6989,7 @@ def polyfit(
                 skipna_da = bool(np.any(da.isnull()))
 
             dims_to_stack = [dimname for dimname in da.dims if dimname != dim]
-            stacked_coords: Dict[Hashable, DataArray] = {}
+            stacked_coords: dict[Hashable, DataArray] = {}
             if dims_to_stack:
                 stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
                 rhs = da.transpose(dim, *dims_to_stack).stack(
@@ -6969,7 +7015,7 @@ def polyfit(
                 )
 
             if isinstance(name, str):
-                name = "{}_".format(name)
+                name = f"{name}_"
             else:
                 # Thus a ReprObject => polyfit was called on a DataArray
                 name = ""
@@ -7013,16 +7059,19 @@ def polyfit(
 
     def pad(
         self,
-        pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[Any, int | tuple[int, int]] = None,
         mode: str = "constant",
-        stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        constant_values: Union[
-            int, Tuple[int, int], Mapping[Any, Tuple[int, int]]
-        ] = None,
-        end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
+        stat_length: int
+        | tuple[int, int]
+        | Mapping[Any, tuple[int, int]]
+        | None = None,
+        constant_values: (
+            int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None
+        ) = None,
+        end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
         reflect_type: str = None,
         **pad_width_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Pad this dataset along one or more dimensions.
 
         .. warning::
@@ -7169,7 +7218,7 @@ def idxmin(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return the coordinate label of the minimum value along a dimension.
 
         Returns a new `Dataset` named after the dimension with the values of
@@ -7266,7 +7315,7 @@ def idxmax(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return the coordinate label of the maximum value along a dimension.
 
         Returns a new `Dataset` named after the dimension with the values of
@@ -7479,7 +7528,7 @@ def query(
         engine: str = None,
         missing_dims: str = "raise",
         **queries_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return a new dataset with each array indexed along the specified
         dimension(s), where the indexers are given as strings containing
         Python expressions to be evaluated against the data variables in the
@@ -7570,14 +7619,14 @@ def query(
 
     def curvefit(
         self,
-        coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]],
+        coords: str | DataArray | Iterable[str | DataArray],
         func: Callable[..., Any],
-        reduce_dims: Union[Hashable, Iterable[Hashable]] = None,
+        reduce_dims: Hashable | Iterable[Hashable] = None,
         skipna: bool = True,
-        p0: Dict[str, Any] = None,
-        bounds: Dict[str, Any] = None,
+        p0: dict[str, Any] = None,
+        bounds: dict[str, Any] = None,
         param_names: Sequence[str] = None,
-        kwargs: Dict[str, Any] = None,
+        kwargs: dict[str, Any] = None,
     ):
         """
         Curve fitting optimization for arbitrary functions.
@@ -7743,14 +7792,53 @@ def _wrapper(Y, *coords_, **kwargs):
 
         return result
 
+    def drop_duplicates(
+        self,
+        dim: Hashable | Iterable[Hashable] | ...,
+        keep: Literal["first", "last"] | Literal[False] = "first",
+    ):
+        """Returns a new Dataset with duplicate dimension values removed.
+
+        Parameters
+        ----------
+        dim : dimension label or labels
+            Pass `...` to drop duplicates along all dimensions.
+        keep : {"first", "last", False}, default: "first"
+            Determines which duplicates (if any) to keep.
+            - ``"first"`` : Drop duplicates except for the first occurrence.
+            - ``"last"`` : Drop duplicates except for the last occurrence.
+            - False : Drop all duplicates.
+
+        Returns
+        -------
+        Dataset
+
+        See Also
+        --------
+        DataArray.drop_duplicates
+        """
+        if isinstance(dim, str):
+            dims = (dim,)
+        elif dim is ...:
+            dims = self.dims
+        else:
+            dims = dim
+
+        missing_dims = set(dims) - set(self.dims)
+        if missing_dims:
+            raise ValueError(f"'{missing_dims}' not found in dimensions")
+
+        indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims}
+        return self.isel(indexes)
+
     def convert_calendar(
         self,
         calendar: str,
         dim: str = "time",
-        align_on: Optional[str] = None,
-        missing: Optional[Any] = None,
-        use_cftime: Optional[bool] = None,
-    ) -> "Dataset":
+        align_on: str | None = None,
+        missing: Any | None = None,
+        use_cftime: bool | None = None,
+    ) -> Dataset:
         """Convert the Dataset to another calendar.
 
         Only converts the individual timestamps, does not modify any data except
@@ -7868,9 +7956,9 @@ def convert_calendar(
 
     def interp_calendar(
         self,
-        target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"],
+        target: pd.DatetimeIndex | CFTimeIndex | DataArray,
         dim: str = "time",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Interpolates the Dataset to another calendar based on decimal year measure.
 
         Each timestamp in `source` and `target` are first converted to their decimal
diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py
index 5f9349051b7..1e87e782fb2 100644
--- a/xarray/core/dtypes.py
+++ b/xarray/core/dtypes.py
@@ -34,7 +34,7 @@ def __eq__(self, other):
 # Pairs of types that, if both found, should be promoted to object dtype
 # instead of following NumPy's own type-promotion rules. These type promotion
 # rules match pandas instead. For reference, see the NumPy type hierarchy:
-# https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
+# https://numpy.org/doc/stable/reference/arrays.scalars.html
 PROMOTE_TO_OBJECT = [
     {np.number, np.character},  # numpy promotes to character
     {np.bool_, np.character},  # numpy promotes to character
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 5b0d9a4fcd4..b85d0e1645e 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -20,17 +20,10 @@
 from numpy import stack as _stack
 from numpy import take, tensordot, transpose, unravel_index  # noqa
 from numpy import where as _where
-from packaging.version import Version
 
 from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils
 from .nputils import nanfirst, nanlast
-from .pycompat import (
-    cupy_array_type,
-    dask_array_type,
-    is_duck_dask_array,
-    sparse_array_type,
-    sparse_version,
-)
+from .pycompat import cupy_array_type, dask_array_type, is_duck_dask_array
 from .utils import is_duck_array
 
 try:
@@ -174,17 +167,6 @@ def cumulative_trapezoid(y, x, axis):
 
 
 def astype(data, dtype, **kwargs):
-    if (
-        isinstance(data, sparse_array_type)
-        and sparse_version < Version("0.11.0")
-        and "casting" in kwargs
-    ):
-        warnings.warn(
-            "The current version of sparse does not support the 'casting' argument. It will be ignored in the call to astype().",
-            RuntimeWarning,
-            stacklevel=4,
-        )
-        kwargs.pop("casting")
 
     return data.astype(dtype, **kwargs)
 
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index c0633064231..2a9f8a27815 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -306,7 +306,7 @@ def summarize_variable(
 
 def _summarize_coord_multiindex(coord, col_width, marker):
     first_col = pretty_print(f"  {marker} {coord.name} ", col_width)
-    return "{}({}) MultiIndex".format(first_col, str(coord.dims[0]))
+    return f"{first_col}({str(coord.dims[0])}) MultiIndex"
 
 
 def _summarize_coord_levels(coord, col_width, marker="-"):
@@ -622,7 +622,7 @@ def array_repr(arr):
 
 
 def dataset_repr(ds):
-    summary = ["<xarray.{}>".format(type(ds).__name__)]
+    summary = [f"<xarray.{type(ds).__name__}>"]
 
     col_width = _calculate_col_width(_get_col_items(ds.variables))
     max_rows = OPTIONS["display_max_rows"]
diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
index 072a932b943..36c252f276e 100644
--- a/xarray/core/formatting_html.py
+++ b/xarray/core/formatting_html.py
@@ -266,7 +266,7 @@ def _obj_repr(obj, header_components, sections):
 def array_repr(arr):
     dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape))
 
-    obj_type = "xarray.{}".format(type(arr).__name__)
+    obj_type = f"xarray.{type(arr).__name__}"
     arr_name = f"'{arr.name}'" if getattr(arr, "name", None) else ""
     coord_names = list(arr.coords) if hasattr(arr, "coords") else []
 
@@ -287,7 +287,7 @@ def array_repr(arr):
 
 
 def dataset_repr(ds):
-    obj_type = "xarray.{}".format(type(ds).__name__)
+    obj_type = f"xarray.{type(ds).__name__}"
 
     header_components = [f"<div class='xr-obj-type'>{escape(obj_type)}</div>"]
 
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index c79ef2778b8..c93a9c81b80 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -202,7 +202,7 @@ def _unique_and_monotonic(group):
     if isinstance(group, _DummyGroup):
         return True
     index = safe_cast_to_index(group)
-    return index.is_unique and index.is_monotonic
+    return index.is_unique and index.is_monotonic_increasing
 
 
 def _apply_loffset(grouper, result):
@@ -353,7 +353,7 @@ def __init__(
 
         if grouper is not None:
             index = safe_cast_to_index(group)
-            if not index.is_monotonic:
+            if not index.is_monotonic_increasing:
                 # TODO: sort instead of raising an error
                 raise ValueError("index must be monotonic for resampling")
             full_index, first_items = self._get_index_and_items(index, grouper)
@@ -663,7 +663,13 @@ def fillna(self, value):
         return ops.fillna(self, value)
 
     def quantile(
-        self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True
+        self,
+        q,
+        dim=None,
+        method="linear",
+        keep_attrs=None,
+        skipna=None,
+        interpolation=None,
     ):
         """Compute the qth quantile over each array in the groups and
         concatenate them together into a new array.
@@ -676,20 +682,39 @@ def quantile(
         dim : ..., str or sequence of str, optional
             Dimension(s) over which to apply quantile.
             Defaults to the grouped dimension.
-        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
+        method : str, default: "linear"
+            This optional parameter specifies the interpolation method to use when the
+            desired quantile lies between two data points. The options sorted by their R
+            type as summarized in the H&F paper [1]_ are:
+
+                1. "inverted_cdf" (*)
+                2. "averaged_inverted_cdf" (*)
+                3. "closest_observation" (*)
+                4. "interpolated_inverted_cdf" (*)
+                5. "hazen" (*)
+                6. "weibull" (*)
+                7. "linear"  (default)
+                8. "median_unbiased" (*)
+                9. "normal_unbiased" (*)
+
+            The first three methods are discontiuous.  The following discontinuous
+            variations of the default "linear" (7.) option are also available:
+
+                * "lower"
+                * "higher"
+                * "midpoint"
+                * "nearest"
+
+            See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with
+            an asterix require numpy version 1.22 or newer. The "method" argument was
+            previously called "interpolation", renamed in accordance with numpy
+            version 1.22.0.
+
         skipna : bool, optional
-            Whether to skip missing values when aggregating.
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
 
         Returns
         -------
@@ -753,6 +778,12 @@ def quantile(
           * y         (y) int64 1 2
         Data variables:
             a         (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4
+
+        References
+        ----------
+        .. [1] R. J. Hyndman and Y. Fan,
+           "Sample quantiles in statistical packages,"
+           The American Statistician, 50(4), pp. 361-365, 1996
         """
         if dim is None:
             dim = self._group_dim
@@ -762,9 +793,10 @@ def quantile(
             shortcut=False,
             q=q,
             dim=dim,
-            interpolation=interpolation,
+            method=method,
             keep_attrs=keep_attrs,
             skipna=skipna,
+            interpolation=interpolation,
         )
         return out
 
diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
index 1ded35264f4..b66fbdf6504 100644
--- a/xarray/core/indexes.py
+++ b/xarray/core/indexes.py
@@ -231,9 +231,14 @@ def query(self, labels, method=None, tolerance=None):
                         )
                     indexer = self.index.get_loc(label_value)
                 else:
-                    indexer = self.index.get_loc(
-                        label_value, method=method, tolerance=tolerance
-                    )
+                    if method is not None:
+                        indexer = get_indexer_nd(self.index, label, method, tolerance)
+                        if np.any(indexer < 0):
+                            raise KeyError(
+                                f"not all values found in index {coord_name!r}"
+                            )
+                    else:
+                        indexer = self.index.get_loc(label_value)
             elif label.dtype.kind == "b":
                 indexer = label
             else:
@@ -296,7 +301,7 @@ def from_variables(cls, variables: Mapping[Any, "Variable"]):
         if any([var.ndim != 1 for var in variables.values()]):
             raise ValueError("PandasMultiIndex only accepts 1-dimensional variables")
 
-        dims = set([var.dims for var in variables.values()])
+        dims = {var.dims for var in variables.values()}
         if len(dims) != 1:
             raise ValueError(
                 "unmatched dimensions for variables "
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index c93d797266b..17d026baa59 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -35,7 +35,7 @@ def expanded_indexer(key, ndim):
         key = (key,)
     new_key = []
     # handling Ellipsis right is a little tricky, see:
-    # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
+    # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing
     found_ellipsis = False
     for k in key:
         if k is Ellipsis:
@@ -579,7 +579,7 @@ def as_indexable(array):
     if hasattr(array, "__array_function__"):
         return NdArrayLikeIndexingAdapter(array)
 
-    raise TypeError("Invalid array type: {}".format(type(array)))
+    raise TypeError(f"Invalid array type: {type(array)}")
 
 
 def _outer_to_vectorized_indexer(key, shape):
@@ -1051,7 +1051,7 @@ def create_mask(indexer, shape, data=None):
         mask = any(k == -1 for k in indexer.tuple)
 
     else:
-        raise TypeError("unexpected key type: {}".format(type(indexer)))
+        raise TypeError(f"unexpected key type: {type(indexer)}")
 
     return mask
 
@@ -1146,10 +1146,10 @@ def _indexing_array_and_key(self, key):
             array = self.array
             # We want 0d slices rather than scalars. This is achieved by
             # appending an ellipsis (see
-            # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes).
+            # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes).
             key = key.tuple + (Ellipsis,)
         else:
-            raise TypeError("unexpected key type: {}".format(type(key)))
+            raise TypeError(f"unexpected key type: {type(key)}")
 
         return array, key
 
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index 460e02ae10f..e5407ae79c3 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -4,15 +4,12 @@
     TYPE_CHECKING,
     AbstractSet,
     Any,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Mapping,
     NamedTuple,
     Optional,
     Sequence,
-    Set,
     Tuple,
     Union,
 )
@@ -66,12 +63,12 @@ def __init__(self, func):
         self.func = func
 
 
-def broadcast_dimension_size(variables: List[Variable]) -> Dict[Hashable, int]:
+def broadcast_dimension_size(variables: list[Variable]) -> dict[Hashable, int]:
     """Extract dimension sizes from a dictionary of variables.
 
     Raises ValueError if any dimensions have different sizes.
     """
-    dims: Dict[Hashable, int] = {}
+    dims: dict[Hashable, int] = {}
     for var in variables:
         for dim, size in zip(var.dims, var.shape):
             if dim in dims and size != dims[dim]:
@@ -89,7 +86,7 @@ class MergeError(ValueError):
 
 def unique_variable(
     name: Hashable,
-    variables: List[Variable],
+    variables: list[Variable],
     compat: str = "broadcast_equals",
     equals: bool = None,
 ) -> Variable:
@@ -162,20 +159,18 @@ def unique_variable(
 
 def _assert_compat_valid(compat):
     if compat not in _VALID_COMPAT:
-        raise ValueError(
-            "compat={!r} invalid: must be {}".format(compat, set(_VALID_COMPAT))
-        )
+        raise ValueError(f"compat={compat!r} invalid: must be {set(_VALID_COMPAT)}")
 
 
 MergeElement = Tuple[Variable, Optional[Index]]
 
 
 def merge_collected(
-    grouped: Dict[Hashable, List[MergeElement]],
+    grouped: dict[Hashable, list[MergeElement]],
     prioritized: Mapping[Any, MergeElement] = None,
     compat: str = "minimal",
     combine_attrs="override",
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]:
+) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]:
     """Merge dicts of variables, while resolving conflicts appropriately.
 
     Parameters
@@ -196,8 +191,8 @@ def merge_collected(
 
     _assert_compat_valid(compat)
 
-    merged_vars: Dict[Hashable, Variable] = {}
-    merged_indexes: Dict[Hashable, Index] = {}
+    merged_vars: dict[Hashable, Variable] = {}
+    merged_indexes: dict[Hashable, Index] = {}
 
     for name, elements_list in grouped.items():
         if name in prioritized:
@@ -255,8 +250,8 @@ def merge_collected(
 
 
 def collect_variables_and_indexes(
-    list_of_mappings: List[DatasetLike],
-) -> Dict[Hashable, List[MergeElement]]:
+    list_of_mappings: list[DatasetLike],
+) -> dict[Hashable, list[MergeElement]]:
     """Collect variables and indexes from list of mappings of xarray objects.
 
     Mappings must either be Dataset objects, or have values of one of the
@@ -269,7 +264,7 @@ def collect_variables_and_indexes(
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {}
+    grouped: dict[Hashable, list[tuple[Variable, Index | None]]] = {}
 
     def append(name, variable, index):
         values = grouped.setdefault(name, [])
@@ -307,10 +302,10 @@ def append_all(variables, indexes):
 
 
 def collect_from_coordinates(
-    list_of_coords: "List[Coordinates]",
-) -> Dict[Hashable, List[MergeElement]]:
+    list_of_coords: list[Coordinates],
+) -> dict[Hashable, list[MergeElement]]:
     """Collect variables and indexes to be merged from Coordinate objects."""
-    grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {}
+    grouped: dict[Hashable, list[tuple[Variable, Index | None]]] = {}
 
     for coords in list_of_coords:
         variables = coords.variables
@@ -322,11 +317,11 @@ def collect_from_coordinates(
 
 
 def merge_coordinates_without_align(
-    objects: "List[Coordinates]",
+    objects: list[Coordinates],
     prioritized: Mapping[Any, MergeElement] = None,
     exclude_dims: AbstractSet = frozenset(),
     combine_attrs: str = "override",
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]:
+) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]:
     """Merge variables/indexes from coordinates without automatic alignments.
 
     This function is used for merging coordinate from pre-existing xarray
@@ -335,7 +330,7 @@ def merge_coordinates_without_align(
     collected = collect_from_coordinates(objects)
 
     if exclude_dims:
-        filtered: Dict[Hashable, List[MergeElement]] = {}
+        filtered: dict[Hashable, list[MergeElement]] = {}
         for name, elements in collected.items():
             new_elements = [
                 (variable, index)
@@ -351,8 +346,8 @@ def merge_coordinates_without_align(
 
 
 def determine_coords(
-    list_of_mappings: Iterable["DatasetLike"],
-) -> Tuple[Set[Hashable], Set[Hashable]]:
+    list_of_mappings: Iterable[DatasetLike],
+) -> tuple[set[Hashable], set[Hashable]]:
     """Given a list of dicts with xarray object values, identify coordinates.
 
     Parameters
@@ -370,8 +365,8 @@ def determine_coords(
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    coord_names: Set[Hashable] = set()
-    noncoord_names: Set[Hashable] = set()
+    coord_names: set[Hashable] = set()
+    noncoord_names: set[Hashable] = set()
 
     for mapping in list_of_mappings:
         if isinstance(mapping, Dataset):
@@ -388,7 +383,7 @@ def determine_coords(
     return coord_names, noncoord_names
 
 
-def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["DatasetLike"]:
+def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLike]:
     """Convert pandas values found in a list of labeled objects.
 
     Parameters
@@ -408,7 +403,7 @@ def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["Dataset
     out = []
     for obj in objects:
         if isinstance(obj, Dataset):
-            variables: "DatasetLike" = obj
+            variables: DatasetLike = obj
         else:
             variables = {}
             if isinstance(obj, PANDAS_TYPES):
@@ -422,8 +417,8 @@ def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["Dataset
 
 
 def _get_priority_vars_and_indexes(
-    objects: List["DatasetLike"], priority_arg: Optional[int], compat: str = "equals"
-) -> Dict[Hashable, MergeElement]:
+    objects: list[DatasetLike], priority_arg: int | None, compat: str = "equals"
+) -> dict[Hashable, MergeElement]:
     """Extract the priority variable from a list of mappings.
 
     We need this method because in some cases the priority argument itself
@@ -448,20 +443,20 @@ def _get_priority_vars_and_indexes(
 
     collected = collect_variables_and_indexes([objects[priority_arg]])
     variables, indexes = merge_collected(collected, compat=compat)
-    grouped: Dict[Hashable, MergeElement] = {}
+    grouped: dict[Hashable, MergeElement] = {}
     for name, variable in variables.items():
         grouped[name] = (variable, indexes.get(name))
     return grouped
 
 
 def merge_coords(
-    objects: Iterable["CoercibleMapping"],
+    objects: Iterable[CoercibleMapping],
     compat: str = "minimal",
     join: str = "outer",
-    priority_arg: Optional[int] = None,
-    indexes: Optional[Mapping[Any, Index]] = None,
+    priority_arg: int | None = None,
+    indexes: Mapping[Any, Index] | None = None,
     fill_value: object = dtypes.NA,
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]:
+) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]:
     """Merge coordinate variables.
 
     See merge_core below for argument descriptions. This works similarly to
@@ -568,21 +563,21 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
 
 
 class _MergeResult(NamedTuple):
-    variables: Dict[Hashable, Variable]
-    coord_names: Set[Hashable]
-    dims: Dict[Hashable, int]
-    indexes: Dict[Hashable, pd.Index]
-    attrs: Dict[Hashable, Any]
+    variables: dict[Hashable, Variable]
+    coord_names: set[Hashable]
+    dims: dict[Hashable, int]
+    indexes: dict[Hashable, pd.Index]
+    attrs: dict[Hashable, Any]
 
 
 def merge_core(
-    objects: Iterable["CoercibleMapping"],
+    objects: Iterable[CoercibleMapping],
     compat: str = "broadcast_equals",
     join: str = "outer",
-    combine_attrs: Optional[str] = "override",
-    priority_arg: Optional[int] = None,
-    explicit_coords: Optional[Sequence] = None,
-    indexes: Optional[Mapping[Any, Any]] = None,
+    combine_attrs: str | None = "override",
+    priority_arg: int | None = None,
+    explicit_coords: Sequence | None = None,
+    indexes: Mapping[Any, Any] | None = None,
     fill_value: object = dtypes.NA,
 ) -> _MergeResult:
     """Core logic for merging labeled objects.
@@ -592,7 +587,7 @@ def merge_core(
     Parameters
     ----------
     objects : list of mapping
-        All values must be convertable to labeled arrays.
+        All values must be convertible to labeled arrays.
     compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional
         Compatibility checks to use when merging variables.
     join : {"outer", "inner", "left", "right"}, optional
@@ -667,12 +662,12 @@ def merge_core(
 
 
 def merge(
-    objects: Iterable[Union["DataArray", "CoercibleMapping"]],
+    objects: Iterable[DataArray | CoercibleMapping],
     compat: str = "no_conflicts",
     join: str = "outer",
     fill_value: object = dtypes.NA,
     combine_attrs: str = "override",
-) -> "Dataset":
+) -> Dataset:
     """Merge any number of xarray objects into a single Dataset as variables.
 
     Parameters
@@ -913,9 +908,9 @@ def merge(
 
 
 def dataset_merge_method(
-    dataset: "Dataset",
-    other: "CoercibleMapping",
-    overwrite_vars: Union[Hashable, Iterable[Hashable]],
+    dataset: Dataset,
+    other: CoercibleMapping,
+    overwrite_vars: Hashable | Iterable[Hashable],
     compat: str,
     join: str,
     fill_value: Any,
@@ -938,8 +933,8 @@ def dataset_merge_method(
         objs = [dataset, other]
         priority_arg = 1
     else:
-        other_overwrite: Dict[Hashable, CoercibleValue] = {}
-        other_no_overwrite: Dict[Hashable, CoercibleValue] = {}
+        other_overwrite: dict[Hashable, CoercibleValue] = {}
+        other_no_overwrite: dict[Hashable, CoercibleValue] = {}
         for k, v in other.items():
             if k in overwrite_vars:
                 other_overwrite[k] = v
@@ -958,9 +953,7 @@ def dataset_merge_method(
     )
 
 
-def dataset_update_method(
-    dataset: "Dataset", other: "CoercibleMapping"
-) -> _MergeResult:
+def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeResult:
     """Guts of the Dataset.update method.
 
     This drops a duplicated coordinates from `other` if `other` is not an
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index 6749e5294f0..c1776145e21 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -266,7 +266,7 @@ def get_clean_interp_index(
         index.name = dim
 
     if strict:
-        if not index.is_monotonic:
+        if not index.is_monotonic_increasing:
             raise ValueError(f"Index {index.name!r} must be monotonically increasing")
 
         if not index.is_unique:
@@ -386,9 +386,9 @@ def func_interpolate_na(interpolator, y, x, **kwargs):
     nans = pd.isnull(y)
     nonans = ~nans
 
-    # fast track for no-nans and all-nans cases
+    # fast track for no-nans, all nan but one, and all-nans cases
     n_nans = nans.sum()
-    if n_nans == 0 or n_nans == len(y):
+    if n_nans == 0 or n_nans >= len(y) - 1:
         return y
 
     f = interpolator(x[nonans], y[nonans], **kwargs)
@@ -564,9 +564,8 @@ def _localize(var, indexes_coords):
         minval = np.nanmin(new_x.values)
         maxval = np.nanmax(new_x.values)
         index = x.to_index()
-        imin = index.get_loc(minval, method="nearest")
-        imax = index.get_loc(maxval, method="nearest")
-
+        imin = index.get_indexer([minval], method="nearest").item()
+        imax = index.get_indexer([maxval], method="nearest").item()
         indexes[dim] = slice(max(imin - 2, 0), imax + 2)
         indexes_coords[dim] = (x[indexes[dim]], new_x)
     return var.isel(**indexes), indexes_coords
@@ -574,7 +573,7 @@ def _localize(var, indexes_coords):
 
 def _floatize_x(x, new_x):
     """Make x and new_x float.
-    This is particulary useful for datetime dtype.
+    This is particularly useful for datetime dtype.
     x, new_x: tuple of np.ndarray
     """
     x = list(x)
@@ -625,7 +624,7 @@ def interp(var, indexes_coords, method, **kwargs):
     kwargs["bounds_error"] = kwargs.get("bounds_error", False)
 
     result = var
-    # decompose the interpolation into a succession of independant interpolation
+    # decompose the interpolation into a succession of independent interpolation
     for indexes_coords in decompose_interp(indexes_coords):
         var = result
 
@@ -721,7 +720,7 @@ def interp_func(var, x, new_x, method, kwargs):
 
         _, rechunked = da.unify_chunks(*args)
 
-        args = tuple([elem for pair in zip(rechunked, args[1::2]) for elem in pair])
+        args = tuple(elem for pair in zip(rechunked, args[1::2]) for elem in pair)
 
         new_x = rechunked[1 + (len(rechunked) - 1) // 2 :]
 
@@ -732,7 +731,7 @@ def interp_func(var, x, new_x, method, kwargs):
             for i in range(new_x[0].ndim)
         }
 
-        # if usefull, re-use localize for each chunk of new_x
+        # if useful, re-use localize for each chunk of new_x
         localize = (method in ["linear", "nearest"]) and (new_x[0].chunks is not None)
 
         # scipy.interpolate.interp1d always forces to float.
@@ -826,7 +825,7 @@ def _dask_aware_interpnd(var, *coords, interp_func, interp_kwargs, localize=True
 
 
 def decompose_interp(indexes_coords):
-    """Decompose the interpolation into a succession of independant interpolation keeping the order"""
+    """Decompose the interpolation into a succession of independent interpolation keeping the order"""
 
     dest_dims = [
         dest[1].dims if dest[1].ndim > 0 else [dim]
diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py
index 1eaa2728e8a..b5b98052fe9 100644
--- a/xarray/core/npcompat.py
+++ b/xarray/core/npcompat.py
@@ -28,7 +28,7 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from typing import TYPE_CHECKING, Any, Sequence, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar, Union
 
 import numpy as np
 from packaging.version import Version
@@ -169,3 +169,29 @@ def sliding_window_view(
         return as_strided(
             x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable
         )
+
+
+if Version(np.__version__) >= Version("1.22.0"):
+    QUANTILE_METHODS = Literal[
+        "inverted_cdf",
+        "averaged_inverted_cdf",
+        "closest_observation",
+        "interpolated_inverted_cdf",
+        "hazen",
+        "weibull",
+        "linear",
+        "median_unbiased",
+        "normal_unbiased",
+        "lower",
+        "higher",
+        "midpoint",
+        "nearest",
+    ]
+else:
+    QUANTILE_METHODS = Literal[  # type: ignore[misc]
+        "linear",
+        "lower",
+        "higher",
+        "midpoint",
+        "nearest",
+    ]
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index 3e0f550dd30..1feb97c5aa4 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -103,7 +103,7 @@ def _advanced_indexer_subspaces(key):
         # Nothing to reorder: dimensions on the indexing result are already
         # ordered like vindex. See NumPy's rule for "Combining advanced and
         # basic indexing":
-        # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#combining-advanced-and-basic-indexing
+        # https://numpy.org/doc/stable/reference/arrays.indexing.html#combining-advanced-and-basic-indexing
         return (), ()
 
     non_slices = [k for k in key if not isinstance(k, slice)]
diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
index aad1d285377..3f6bb34a36e 100644
--- a/xarray/core/parallel.py
+++ b/xarray/core/parallel.py
@@ -8,14 +8,10 @@
     Any,
     Callable,
     DefaultDict,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Mapping,
     Sequence,
-    Tuple,
-    Union,
 )
 
 import numpy as np
@@ -53,7 +49,7 @@ def assert_chunks_compatible(a: Dataset, b: Dataset):
 
 
 def check_result_variables(
-    result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str
+    result: DataArray | Dataset, expected: Mapping[str, Any], kind: str
 ):
 
     if kind == "coords":
@@ -126,7 +122,7 @@ def make_meta(obj):
 
 
 def infer_template(
-    func: Callable[..., T_Xarray], obj: Union[DataArray, Dataset], *args, **kwargs
+    func: Callable[..., T_Xarray], obj: DataArray | Dataset, *args, **kwargs
 ) -> T_Xarray:
     """Infer return object by running the function on meta objects."""
     meta_args = [make_meta(arg) for arg in (obj,) + args]
@@ -148,7 +144,7 @@ def infer_template(
     return template
 
 
-def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]:
+def make_dict(x: DataArray | Dataset) -> dict[Hashable, Any]:
     """Map variable name to numpy(-like) data
     (Dataset.to_dict() is too complicated).
     """
@@ -167,10 +163,10 @@ def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping
 
 def map_blocks(
     func: Callable[..., T_Xarray],
-    obj: Union[DataArray, Dataset],
+    obj: DataArray | Dataset,
     args: Sequence[Any] = (),
     kwargs: Mapping[str, Any] = None,
-    template: Union[DataArray, Dataset] = None,
+    template: DataArray | Dataset | None = None,
 ) -> T_Xarray:
     """Apply a function to each block of a DataArray or Dataset.
 
@@ -271,7 +267,7 @@ def map_blocks(
 
     def _wrapper(
         func: Callable,
-        args: List,
+        args: list,
         kwargs: dict,
         arg_is_array: Iterable[bool],
         expected: dict,
@@ -415,8 +411,8 @@ def _wrapper(
     # for each variable in the dataset, which is the result of the
     # func applied to the values.
 
-    graph: Dict[Any, Any] = {}
-    new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict)
+    graph: dict[Any, Any] = {}
+    new_layers: DefaultDict[str, dict[Any, Any]] = collections.defaultdict(dict)
     gname = "{}-{}".format(
         dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs)
     )
@@ -516,14 +512,14 @@ def subset_dataset_to_block(
         graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected)
 
         # mapping from variable name to dask graph key
-        var_key_map: Dict[Hashable, str] = {}
+        var_key_map: dict[Hashable, str] = {}
         for name, variable in template.variables.items():
             if name in indexes:
                 continue
             gname_l = f"{name}-{gname}"
             var_key_map[name] = gname_l
 
-            key: Tuple[Any, ...] = (gname_l,)
+            key: tuple[Any, ...] = (gname_l,)
             for dim in variable.dims:
                 if dim in chunk_index:
                     key += (chunk_index[dim],)
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index 0cac9f2b129..f2ac9d979ae 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -33,7 +33,7 @@
 Returns
 -------
 reduced : same type as caller
-    New object with `{name}` applied along its rolling dimnension.
+    New object with `{name}` applied along its rolling dimension.
 """
 
 
@@ -175,7 +175,7 @@ def _mapping_to_list(
             return [arg]
         else:
             raise ValueError(
-                "Mapping argument is necessary for {}d-rolling.".format(len(self.dim))
+                f"Mapping argument is necessary for {len(self.dim)}d-rolling."
             )
 
     def _get_keep_attrs(self, keep_attrs):
@@ -767,7 +767,7 @@ def __init__(self, obj, windows, boundary, side, coord_func):
             exponential window along (e.g. `time`) to the size of the moving window.
         boundary : 'exact' | 'trim' | 'pad'
             If 'exact', a ValueError will be raised if dimension size is not a
-            multiple of window size. If 'trim', the excess indexes are trimed.
+            multiple of window size. If 'trim', the excess indexes are trimmed.
             If 'pad', NA will be padded.
         side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
         coord_func : mapping from coordinate name to func.
@@ -803,7 +803,7 @@ def __repr__(self):
         """provide a nice str repr of our coarsen object"""
 
         attrs = [
-            "{k}->{v}".format(k=k, v=getattr(self, k))
+            f"{k}->{getattr(self, k)}"
             for k in self._attributes
             if getattr(self, k, None) is not None
         ]
diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py
index 7a8b0be9bd4..9fd097cd4dc 100644
--- a/xarray/core/rolling_exp.py
+++ b/xarray/core/rolling_exp.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Generic, Mapping, Union
+from typing import Any, Generic, Mapping
 
 import numpy as np
 from packaging.version import Version
@@ -101,7 +101,7 @@ class RollingExp(Generic[T_Xarray]):
     def __init__(
         self,
         obj: T_Xarray,
-        windows: Mapping[Any, Union[int, float]],
+        windows: Mapping[Any, int | float],
         window_type: str = "span",
     ):
         self.obj: T_Xarray = obj
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 17727830fb6..06257cef8da 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -1,4 +1,6 @@
 """Internal utilities; not for external use"""
+from __future__ import annotations
+
 import contextlib
 import functools
 import io
@@ -14,18 +16,14 @@
     Callable,
     Collection,
     Container,
-    Dict,
     Hashable,
     Iterable,
     Iterator,
     Mapping,
     MutableMapping,
     MutableSet,
-    Optional,
     Sequence,
-    Tuple,
     TypeVar,
-    Union,
     cast,
 )
 
@@ -188,7 +186,7 @@ def list_equiv(first, second):
     return equiv
 
 
-def peek_at(iterable: Iterable[T]) -> Tuple[T, Iterator[T]]:
+def peek_at(iterable: Iterable[T]) -> tuple[T, Iterator[T]]:
     """Returns the first value from iterable, as well as a new iterator with
     the same content as the original iterable
     """
@@ -273,7 +271,7 @@ def is_duck_array(value: Any) -> bool:
 
 
 def either_dict_or_kwargs(
-    pos_kwargs: Optional[Mapping[Any, T]],
+    pos_kwargs: Mapping[Any, T] | None,
     kw_kwargs: Mapping[str, T],
     func_name: str,
 ) -> Mapping[Hashable, T]:
@@ -468,7 +466,7 @@ def __contains__(self, key: object) -> bool:
         return key in self.mapping
 
     def __repr__(self) -> str:
-        return "{}({!r})".format(type(self).__name__, self.mapping)
+        return f"{type(self).__name__}({self.mapping!r})"
 
 
 def FrozenDict(*args, **kwargs) -> Frozen:
@@ -511,7 +509,7 @@ class OrderedSet(MutableSet[T]):
     a dict. Note that, unlike in an OrderedDict, equality tests are not order-sensitive.
     """
 
-    _d: Dict[T, None]
+    _d: dict[T, None]
 
     __slots__ = ("_d",)
 
@@ -544,7 +542,7 @@ def update(self, values: Iterable[T]) -> None:
             self._d[v] = None
 
     def __repr__(self) -> str:
-        return "{}({!r})".format(type(self).__name__, list(self))
+        return f"{type(self).__name__}({list(self)!r})"
 
 
 class NdimSizeLenMixin:
@@ -585,14 +583,14 @@ def dtype(self: Any) -> np.dtype:
         return self.array.dtype
 
     @property
-    def shape(self: Any) -> Tuple[int]:
+    def shape(self: Any) -> tuple[int]:
         return self.array.shape
 
     def __getitem__(self: Any, key):
         return self.array[key]
 
     def __repr__(self: Any) -> str:
-        return "{}(array={!r})".format(type(self).__name__, self.array)
+        return f"{type(self).__name__}(array={self.array!r})"
 
 
 class ReprObject:
@@ -659,7 +657,7 @@ def read_magic_number_from_file(filename_or_obj, count=8) -> bytes:
     return magic_number
 
 
-def try_read_magic_number_from_path(pathlike, count=8) -> Optional[bytes]:
+def try_read_magic_number_from_path(pathlike, count=8) -> bytes | None:
     if isinstance(pathlike, str) or hasattr(pathlike, "__fspath__"):
         path = os.fspath(pathlike)
         try:
@@ -670,9 +668,7 @@ def try_read_magic_number_from_path(pathlike, count=8) -> Optional[bytes]:
     return None
 
 
-def try_read_magic_number_from_file_or_path(
-    filename_or_obj, count=8
-) -> Optional[bytes]:
+def try_read_magic_number_from_file_or_path(filename_or_obj, count=8) -> bytes | None:
     magic_number = try_read_magic_number_from_path(filename_or_obj, count)
     if magic_number is None:
         try:
@@ -706,7 +702,7 @@ def hashable(v: Any) -> bool:
     return True
 
 
-def decode_numpy_dict_values(attrs: Mapping[K, V]) -> Dict[K, V]:
+def decode_numpy_dict_values(attrs: Mapping[K, V]) -> dict[K, V]:
     """Convert attribute values from numpy objects to native Python objects,
     for use in to_dict
     """
@@ -815,7 +811,7 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
 
 def drop_dims_from_indexers(
     indexers: Mapping[Any, Any],
-    dims: Union[list, Mapping[Any, int]],
+    dims: list | Mapping[Any, int],
     missing_dims: str,
 ) -> Mapping[Hashable, Any]:
     """Depending on the setting of missing_dims, drop any dimensions from indexers that
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 58aeceed3b1..c8d46d20d46 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -6,21 +6,11 @@
 import warnings
 from collections import defaultdict
 from datetime import timedelta
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Hashable,
-    List,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-)
+from typing import TYPE_CHECKING, Any, Hashable, Mapping, Sequence
 
 import numpy as np
 import pandas as pd
+from packaging.version import Version
 
 import xarray as xr  # only for Dataset and DataArray
 
@@ -35,6 +25,7 @@
     VectorizedIndexer,
     as_indexable,
 )
+from .npcompat import QUANTILE_METHODS, ArrayLike
 from .options import OPTIONS, _get_keep_attrs
 from .pycompat import (
     DuckArrayModule,
@@ -80,7 +71,7 @@ class MissingDimensionsError(ValueError):
     # TODO: move this to an xarray.exceptions module?
 
 
-def as_variable(obj, name=None) -> Union[Variable, IndexVariable]:
+def as_variable(obj, name=None) -> Variable | IndexVariable:
     """Convert an object into a Variable.
 
     Parameters
@@ -136,7 +127,7 @@ def as_variable(obj, name=None) -> Union[Variable, IndexVariable]:
     elif isinstance(obj, (pd.Index, IndexVariable)) and obj.name is not None:
         obj = Variable(obj.name, obj)
     elif isinstance(obj, (set, dict)):
-        raise TypeError("variable {!r} has invalid type {!r}".format(name, type(obj)))
+        raise TypeError(f"variable {name!r} has invalid type {type(obj)!r}")
     elif name is not None:
         data = as_compatible_data(obj)
         if data.ndim != 1:
@@ -709,7 +700,7 @@ def _broadcast_indexes_outer(self, key):
         return dims, OuterIndexer(tuple(new_key)), None
 
     def _nonzero(self):
-        """Equivalent numpy's nonzero but returns a tuple of Varibles."""
+        """Equivalent numpy's nonzero but returns a tuple of Variables."""
         # TODO we should replace dask's native nonzero
         # after https://github.com/dask/dask/issues/1076 is implemented.
         nonzeros = np.nonzero(self.data)
@@ -865,7 +856,7 @@ def __setitem__(self, key, value):
         indexable[index_tuple] = value
 
     @property
-    def attrs(self) -> Dict[Hashable, Any]:
+    def attrs(self) -> dict[Hashable, Any]:
         """Dictionary of local attributes on this variable."""
         if self._attrs is None:
             self._attrs = {}
@@ -999,7 +990,7 @@ def __deepcopy__(self, memo=None):
     __hash__ = None  # type: ignore[assignment]
 
     @property
-    def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
+    def chunks(self) -> tuple[tuple[int, ...], ...] | None:
         """
         Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
         the underlying data is not a dask array.
@@ -1013,7 +1004,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
         return getattr(self._data, "chunks", None)
 
     @property
-    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this variable's data, or None if
         the underlying data is not a dask array.
@@ -1282,7 +1273,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs):
 
     def _pad_options_dim_to_index(
         self,
-        pad_option: Mapping[Any, Union[int, Tuple[int, int]]],
+        pad_option: Mapping[Any, int | tuple[int, int]],
         fill_with_shape=False,
     ):
         if fill_with_shape:
@@ -1294,14 +1285,16 @@ def _pad_options_dim_to_index(
 
     def pad(
         self,
-        pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[Any, int | tuple[int, int]] | None = None,
         mode: str = "constant",
-        stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        constant_values: Union[
-            int, Tuple[int, int], Mapping[Any, Tuple[int, int]]
-        ] = None,
-        end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        reflect_type: str = None,
+        stat_length: int
+        | tuple[int, int]
+        | Mapping[Any, tuple[int, int]]
+        | None = None,
+        constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]])
+        | None = None,
+        end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
+        reflect_type: str | None = None,
         **pad_width_kwargs: Any,
     ):
         """
@@ -1438,7 +1431,7 @@ def transpose(
         self,
         *dims,
         missing_dims: str = "raise",
-    ) -> "Variable":
+    ) -> Variable:
         """Return a new Variable object with transposed dimensions.
 
         Parameters
@@ -1483,7 +1476,7 @@ def transpose(
         return self._replace(dims=dims, data=data)
 
     @property
-    def T(self) -> "Variable":
+    def T(self) -> Variable:
         return self.transpose()
 
     def set_dims(self, dims, shape=None):
@@ -1535,7 +1528,7 @@ def set_dims(self, dims, shape=None):
         )
         return expanded_var.transpose(*dims)
 
-    def _stack_once(self, dims: List[Hashable], new_dim: Hashable):
+    def _stack_once(self, dims: list[Hashable], new_dim: Hashable):
         if not set(dims) <= set(self.dims):
             raise ValueError(f"invalid existing dimensions: {dims}")
 
@@ -1593,7 +1586,7 @@ def stack(self, dimensions=None, **dimensions_kwargs):
 
     def _unstack_once_full(
         self, dims: Mapping[Any, int], old_dim: Hashable
-    ) -> "Variable":
+    ) -> Variable:
         """
         Unstacks the variable without needing an index.
 
@@ -1634,7 +1627,7 @@ def _unstack_once(
         dim: Hashable,
         fill_value=dtypes.NA,
         sparse: bool = False,
-    ) -> "Variable":
+    ) -> Variable:
         """
         Unstacks this variable given an index to unstack and the name of the
         dimension to which the index refers.
@@ -1980,8 +1973,14 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv):
         return self.broadcast_equals(other, equiv=equiv)
 
     def quantile(
-        self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True
-    ):
+        self,
+        q: ArrayLike,
+        dim: str | Sequence[Hashable] | None = None,
+        method: QUANTILE_METHODS = "linear",
+        keep_attrs: bool = None,
+        skipna: bool = None,
+        interpolation: QUANTILE_METHODS = None,
+    ) -> Variable:
         """Compute the qth quantile of the data along the specified dimension.
 
         Returns the qth quantiles(s) of the array elements.
@@ -1993,22 +1992,43 @@ def quantile(
             inclusive.
         dim : str or sequence of str, optional
             Dimension(s) over which to apply quantile.
-        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}, default: "linear"
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
+        method : str, default: "linear"
+            This optional parameter specifies the interpolation method to use when the
+            desired quantile lies between two data points. The options sorted by their R
+            type as summarized in the H&F paper [1]_ are:
+
+                1. "inverted_cdf" (*)
+                2. "averaged_inverted_cdf" (*)
+                3. "closest_observation" (*)
+                4. "interpolated_inverted_cdf" (*)
+                5. "hazen" (*)
+                6. "weibull" (*)
+                7. "linear"  (default)
+                8. "median_unbiased" (*)
+                9. "normal_unbiased" (*)
+
+            The first three methods are discontiuous.  The following discontinuous
+            variations of the default "linear" (7.) option are also available:
+
+                * "lower"
+                * "higher"
+                * "midpoint"
+                * "nearest"
+
+            See :py:func:`numpy.quantile` or [1]_ for details. Methods marked with
+            an asterix require numpy version 1.22 or newer. The "method" argument was
+            previously called "interpolation", renamed in accordance with numpy
+            version 1.22.0.
+
         keep_attrs : bool, optional
             If True, the variable's attributes (`attrs`) will be copied from
             the original object to the new one.  If False (default), the new
             object will be returned without attributes.
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
 
         Returns
         -------
@@ -2023,11 +2043,31 @@ def quantile(
         --------
         numpy.nanquantile, pandas.Series.quantile, Dataset.quantile
         DataArray.quantile
+
+        References
+        ----------
+        .. [1] R. J. Hyndman and Y. Fan,
+           "Sample quantiles in statistical packages,"
+           The American Statistician, 50(4), pp. 361-365, 1996
         """
 
         from .computation import apply_ufunc
 
-        _quantile_func = np.nanquantile if skipna else np.quantile
+        if interpolation is not None:
+            warnings.warn(
+                "The `interpolation` argument to quantile was renamed to `method`.",
+                FutureWarning,
+            )
+
+            if method != "linear":
+                raise TypeError("Cannot pass interpolation and method keywords!")
+
+            method = interpolation
+
+        if skipna or (skipna is None and self.dtype.kind in "cfO"):
+            _quantile_func = np.nanquantile
+        else:
+            _quantile_func = np.quantile
 
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=False)
@@ -2046,6 +2086,12 @@ def _wrapper(npa, **kwargs):
             return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1)
 
         axis = np.arange(-1, -1 * len(dim) - 1, -1)
+
+        if Version(np.__version__) >= Version("1.22.0"):
+            kwargs = {"q": q, "axis": axis, "method": method}
+        else:
+            kwargs = {"q": q, "axis": axis, "interpolation": method}
+
         result = apply_ufunc(
             _wrapper,
             self,
@@ -2055,7 +2101,7 @@ def _wrapper(npa, **kwargs):
             output_dtypes=[np.float64],
             dask_gufunc_kwargs=dict(output_sizes={"quantile": len(q)}),
             dask="parallelized",
-            kwargs={"q": q, "axis": axis, "interpolation": interpolation},
+            kwargs=kwargs,
         )
 
         # for backward compatibility
@@ -2109,9 +2155,7 @@ def rank(self, dim, pct=False):
                 "prior to calling this method."
             )
         elif not isinstance(data, np.ndarray):
-            raise TypeError(
-                "rank is not implemented for {} objects.".format(type(data))
-            )
+            raise TypeError(f"rank is not implemented for {type(data)} objects.")
 
         axis = self.get_axis_num(dim)
         func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata
@@ -2455,11 +2499,11 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
     def _unravel_argminmax(
         self,
         argminmax: str,
-        dim: Union[Hashable, Sequence[Hashable], None],
-        axis: Union[int, None],
-        keep_attrs: Optional[bool],
-        skipna: Optional[bool],
-    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+        dim: Hashable | Sequence[Hashable] | None,
+        axis: int | None,
+        keep_attrs: bool | None,
+        skipna: bool | None,
+    ) -> Variable | dict[Hashable, Variable]:
         """Apply argmin or argmax over one or more dimensions, returning the result as a
         dict of DataArray that can be passed directly to isel.
         """
@@ -2524,11 +2568,11 @@ def _unravel_argminmax(
 
     def argmin(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+    ) -> Variable | dict[Hashable, Variable]:
         """Index or indices of the minimum of the Variable over one or more dimensions.
         If a sequence is passed to 'dim', then result returned as dict of Variables,
         which can be passed directly to isel(). If a single str is passed to 'dim' then
@@ -2569,11 +2613,11 @@ def argmin(
 
     def argmax(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+    ) -> Variable | dict[Hashable, Variable]:
         """Index or indices of the maximum of the Variable over one or more dimensions.
         If a sequence is passed to 'dim', then result returned as dict of Variables,
         which can be passed directly to isel(). If a single str is passed to 'dim' then
@@ -2801,7 +2845,7 @@ def to_index(self):
             # set default names for multi-index unnamed levels so that
             # we can safely rename dimension / coordinate later
             valid_level_names = [
-                name or "{}_level_{}".format(self.dims[0], i)
+                name or f"{self.dims[0]}_level_{i}"
                 for i, name in enumerate(index.names)
             ]
             index = index.set_names(valid_level_names)
diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
index 0676d351b6f..83ce36bcb35 100644
--- a/xarray/core/weighted.py
+++ b/xarray/core/weighted.py
@@ -1,4 +1,6 @@
-from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Optional, Union, cast
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Generic, Hashable, Iterable, cast
 
 import numpy as np
 
@@ -74,7 +76,7 @@ class Weighted(Generic[T_Xarray]):
 
     __slots__ = ("obj", "weights")
 
-    def __init__(self, obj: T_Xarray, weights: "DataArray"):
+    def __init__(self, obj: T_Xarray, weights: DataArray):
         """
         Create a Weighted object
 
@@ -118,9 +120,9 @@ def _weight_check(w):
             _weight_check(weights.data)
 
         self.obj: T_Xarray = obj
-        self.weights: "DataArray" = weights
+        self.weights: DataArray = weights
 
-    def _check_dim(self, dim: Optional[Union[Hashable, Iterable[Hashable]]]):
+    def _check_dim(self, dim: Hashable | Iterable[Hashable] | None):
         """raise an error if any dimension is missing"""
 
         if isinstance(dim, str) or not isinstance(dim, Iterable):
@@ -135,11 +137,11 @@ def _check_dim(self, dim: Optional[Union[Hashable, Iterable[Hashable]]]):
 
     @staticmethod
     def _reduce(
-        da: "DataArray",
-        weights: "DataArray",
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-    ) -> "DataArray":
+        da: DataArray,
+        weights: DataArray,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+    ) -> DataArray:
         """reduce using dot; equivalent to (da * weights).sum(dim, skipna)
 
         for internal use only
@@ -158,8 +160,8 @@ def _reduce(
         return dot(da, weights, dims=dim)
 
     def _sum_of_weights(
-        self, da: "DataArray", dim: Optional[Union[Hashable, Iterable[Hashable]]] = None
-    ) -> "DataArray":
+        self, da: DataArray, dim: Hashable | Iterable[Hashable] | None = None
+    ) -> DataArray:
         """Calculate the sum of weights, accounting for missing values"""
 
         # we need to mask data values that are nan; else the weights are wrong
@@ -181,32 +183,32 @@ def _sum_of_weights(
 
     def _sum_of_squares(
         self,
-        da: "DataArray",
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-    ) -> "DataArray":
+        da: DataArray,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+    ) -> DataArray:
         """Reduce a DataArray by a weighted ``sum_of_squares`` along some dimension(s)."""
 
         demeaned = da - da.weighted(self.weights).mean(dim=dim)
 
-        return self._reduce((demeaned ** 2), self.weights, dim=dim, skipna=skipna)
+        return self._reduce((demeaned**2), self.weights, dim=dim, skipna=skipna)
 
     def _weighted_sum(
         self,
-        da: "DataArray",
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-    ) -> "DataArray":
+        da: DataArray,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+    ) -> DataArray:
         """Reduce a DataArray by a weighted ``sum`` along some dimension(s)."""
 
         return self._reduce(da, self.weights, dim=dim, skipna=skipna)
 
     def _weighted_mean(
         self,
-        da: "DataArray",
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-    ) -> "DataArray":
+        da: DataArray,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+    ) -> DataArray:
         """Reduce a DataArray by a weighted ``mean`` along some dimension(s)."""
 
         weighted_sum = self._weighted_sum(da, dim=dim, skipna=skipna)
@@ -217,10 +219,10 @@ def _weighted_mean(
 
     def _weighted_var(
         self,
-        da: "DataArray",
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-    ) -> "DataArray":
+        da: DataArray,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+    ) -> DataArray:
         """Reduce a DataArray by a weighted ``var`` along some dimension(s)."""
 
         sum_of_squares = self._sum_of_squares(da, dim=dim, skipna=skipna)
@@ -231,10 +233,10 @@ def _weighted_var(
 
     def _weighted_std(
         self,
-        da: "DataArray",
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-    ) -> "DataArray":
+        da: DataArray,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+    ) -> DataArray:
         """Reduce a DataArray by a weighted ``std`` along some dimension(s)."""
 
         return cast("DataArray", np.sqrt(self._weighted_var(da, dim, skipna)))
@@ -245,8 +247,8 @@ def _implementation(self, func, dim, **kwargs):
 
     def sum_of_weights(
         self,
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        keep_attrs: Optional[bool] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        keep_attrs: bool | None = None,
     ) -> T_Xarray:
 
         return self._implementation(
@@ -255,9 +257,9 @@ def sum_of_weights(
 
     def sum_of_squares(
         self,
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-        keep_attrs: Optional[bool] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+        keep_attrs: bool | None = None,
     ) -> T_Xarray:
 
         return self._implementation(
@@ -266,9 +268,9 @@ def sum_of_squares(
 
     def sum(
         self,
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-        keep_attrs: Optional[bool] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+        keep_attrs: bool | None = None,
     ) -> T_Xarray:
 
         return self._implementation(
@@ -277,9 +279,9 @@ def sum(
 
     def mean(
         self,
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-        keep_attrs: Optional[bool] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+        keep_attrs: bool | None = None,
     ) -> T_Xarray:
 
         return self._implementation(
@@ -288,9 +290,9 @@ def mean(
 
     def var(
         self,
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-        keep_attrs: Optional[bool] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+        keep_attrs: bool | None = None,
     ) -> T_Xarray:
 
         return self._implementation(
@@ -299,9 +301,9 @@ def var(
 
     def std(
         self,
-        dim: Optional[Union[Hashable, Iterable[Hashable]]] = None,
-        skipna: Optional[bool] = None,
-        keep_attrs: Optional[bool] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
+        skipna: bool | None = None,
+        keep_attrs: bool | None = None,
     ) -> T_Xarray:
 
         return self._implementation(
@@ -317,7 +319,7 @@ def __repr__(self):
 
 
 class DataArrayWeighted(Weighted["DataArray"]):
-    def _implementation(self, func, dim, **kwargs) -> "DataArray":
+    def _implementation(self, func, dim, **kwargs) -> DataArray:
 
         self._check_dim(dim)
 
@@ -327,7 +329,7 @@ def _implementation(self, func, dim, **kwargs) -> "DataArray":
 
 
 class DatasetWeighted(Weighted["Dataset"]):
-    def _implementation(self, func, dim, **kwargs) -> "Dataset":
+    def _implementation(self, func, dim, **kwargs) -> Dataset:
 
         self._check_dim(dim)
 
diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py
index c1aedd570bc..527ae121dcf 100644
--- a/xarray/plot/dataset_plot.py
+++ b/xarray/plot/dataset_plot.py
@@ -591,9 +591,9 @@ def streamplot(ds, x, y, ax, u, v, **kwargs):
     if len(ds[y].dims) == 1:
         ydim = ds[y].dims[0]
     if xdim is not None and ydim is None:
-        ydim = set(ds[y].dims) - set([xdim])
+        ydim = set(ds[y].dims) - {xdim}
     if ydim is not None and xdim is None:
-        xdim = set(ds[x].dims) - set([ydim])
+        xdim = set(ds[x].dims) - {ydim}
 
     x, y, u, v = broadcast(ds[x], ds[y], ds[u], ds[v])
 
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index 3b2a133b3e5..f09d1eb1853 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -1036,7 +1036,8 @@ def _get_color_and_size(value):
     if label_values_are_numeric:
         label_values_min = label_values.min()
         label_values_max = label_values.max()
-        fmt.set_bounds(label_values_min, label_values_max)
+        fmt.axis.set_view_interval(label_values_min, label_values_max)
+        fmt.axis.set_data_interval(label_values_min, label_values_max)
 
         if num is not None:
             # Labels are numerical but larger than the target
diff --git a/xarray/testing.py b/xarray/testing.py
index 40ca12852b9..4369b828daf 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -82,7 +82,7 @@ def assert_equal(a, b):
     elif isinstance(a, Dataset):
         assert a.equals(b), formatting.diff_dataset_repr(a, b, "equals")
     else:
-        raise TypeError("{} not supported by assertion comparison".format(type(a)))
+        raise TypeError(f"{type(a)} not supported by assertion comparison")
 
 
 @ensure_warnings
@@ -113,7 +113,7 @@ def assert_identical(a, b):
     elif isinstance(a, (Dataset, Variable)):
         assert a.identical(b), formatting.diff_dataset_repr(a, b, "identical")
     else:
-        raise TypeError("{} not supported by assertion comparison".format(type(a)))
+        raise TypeError(f"{type(a)} not supported by assertion comparison")
 
 
 @ensure_warnings
@@ -170,7 +170,7 @@ def compat_variable(a, b):
         )
         assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv)
     else:
-        raise TypeError("{} not supported by assertion comparison".format(type(a)))
+        raise TypeError(f"{type(a)} not supported by assertion comparison")
 
 
 def _format_message(x, y, err_msg, verbose):
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 20dfdaf5076..00fec07f793 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -1,7 +1,7 @@
 import importlib
 import platform
 import warnings
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext
 from unittest import mock  # noqa: F401
 
 import numpy as np
@@ -113,15 +113,10 @@ def __call__(self, dsk, keys, **kwargs):
         return dask.get(dsk, keys, **kwargs)
 
 
-@contextmanager
-def dummy_context():
-    yield None
-
-
 def raise_if_dask_computes(max_computes=0):
     # return a dummy context manager so that this can be used for non-dask objects
     if not has_dask:
-        return dummy_context()
+        return nullcontext()
     scheduler = CountingScheduler(max_computes)
     return dask.config.set(scheduler=scheduler)
 
@@ -170,6 +165,14 @@ def source_ndarray(array):
     return base
 
 
+@contextmanager
+def assert_no_warnings():
+
+    with warnings.catch_warnings(record=True) as record:
+        yield record
+        assert len(record) == 0, "got unexpected warning(s)"
+
+
 # Internal versions of xarray's test functions that validate additional
 # invariants
 
diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py
index e9278f1e918..92a8b8a0e39 100644
--- a/xarray/tests/test_accessor_dt.py
+++ b/xarray/tests/test_accessor_dt.py
@@ -97,7 +97,7 @@ def test_field_access(self, field) -> None:
     def test_isocalendar(self, field, pandas_field) -> None:
 
         # pandas isocalendar has dtypy UInt32Dtype, convert to Int64
-        expected = pd.Int64Index(getattr(self.times.isocalendar(), pandas_field))
+        expected = pd.Index(getattr(self.times.isocalendar(), pandas_field).astype(int))
         expected = xr.DataArray(
             expected, name=field, coords=[self.times], dims=["time"]
         )
@@ -402,8 +402,7 @@ def times_3d(times):
     "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"]
 )
 def test_field_access(data, field) -> None:
-    if field == "dayofyear" or field == "dayofweek":
-        pytest.importorskip("cftime", minversion="1.0.2.1")
+
     result = getattr(data.time.dt, field)
     expected = xr.DataArray(
         getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field),
@@ -436,7 +435,7 @@ def test_calendar_dask() -> None:
 
     # 3D lazy dask - np
     data = xr.DataArray(
-        da.random.random_integers(1, 1000000, size=(4, 5, 6)).astype("<M8[h]"),
+        da.random.randint(1, 1000000 + 1, size=(4, 5, 6)).astype("<M8[h]"),
         dims=("x", "y", "z"),
     )
     with raise_if_dask_computes():
@@ -504,8 +503,6 @@ def test_cftime_strftime_access(data) -> None:
 def test_dask_field_access_1d(data, field) -> None:
     import dask.array as da
 
-    if field == "dayofyear" or field == "dayofweek":
-        pytest.importorskip("cftime", minversion="1.0.2.1")
     expected = xr.DataArray(
         getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field),
         name=field,
@@ -526,8 +523,6 @@ def test_dask_field_access_1d(data, field) -> None:
 def test_dask_field_access(times_3d, data, field) -> None:
     import dask.array as da
 
-    if field == "dayofyear" or field == "dayofweek":
-        pytest.importorskip("cftime", minversion="1.0.2.1")
     expected = xr.DataArray(
         getattr(
             xr.coding.cftimeindex.CFTimeIndex(times_3d.values.ravel()), field
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index bffac52e979..e0bc0b10437 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -54,6 +54,7 @@
     assert_array_equal,
     assert_equal,
     assert_identical,
+    assert_no_warnings,
     has_dask,
     has_h5netcdf_0_12,
     has_netCDF4,
@@ -437,6 +438,7 @@ def test_dataset_caching(self):
             actual.foo.values  # no caching
             assert not actual.foo.variable._in_memory
 
+    @pytest.mark.filterwarnings("ignore:deallocating CachingFileManager")
     def test_roundtrip_None_variable(self):
         expected = Dataset({None: (("x", "y"), [[0, 1], [2, 3]])})
         with self.roundtrip(expected) as actual:
@@ -517,7 +519,7 @@ def test_roundtrip_cftime_datetime_data(self):
             expected_calendar = times[0].calendar
 
             with warnings.catch_warnings():
-                if expected_calendar in {"proleptic_gregorian", "gregorian"}:
+                if expected_calendar in {"proleptic_gregorian", "standard"}:
                     warnings.filterwarnings("ignore", "Unable to decode time axis")
 
                 with self.roundtrip(expected, save_kwargs=kwargs) as actual:
@@ -743,9 +745,7 @@ def find_and_validate_array(obj):
                     elif isinstance(obj.array, pd.Index):
                         assert isinstance(obj, indexing.PandasIndexingAdapter)
                     else:
-                        raise TypeError(
-                            "{} is wrapped by {}".format(type(obj.array), type(obj))
-                        )
+                        raise TypeError(f"{type(obj.array)} is wrapped by {type(obj)}")
 
         for k, v in ds.variables.items():
             find_and_validate_array(v._data)
@@ -1195,7 +1195,7 @@ def test_multiindex_not_implemented(self):
 @contextlib.contextmanager
 def create_tmp_file(suffix=".nc", allow_cleanup_failure=False):
     temp_dir = tempfile.mkdtemp()
-    path = os.path.join(temp_dir, "temp-{}{}".format(next(_counter), suffix))
+    path = os.path.join(temp_dir, f"temp-{next(_counter)}{suffix}")
     try:
         yield path
     finally:
@@ -1295,7 +1295,7 @@ def test_roundtrip_string_with_fill_value_vlen(self):
         # netCDF4-based backends don't support an explicit fillvalue
         # for variable length strings yet.
         # https://github.com/Unidata/netcdf4-python/issues/730
-        # https://github.com/shoyer/h5netcdf/issues/37
+        # https://github.com/h5netcdf/h5netcdf/issues/37
         original = Dataset({"x": ("t", values, {}, {"_FillValue": "XXX"})})
         with pytest.raises(NotImplementedError):
             with self.roundtrip(original) as actual:
@@ -1446,7 +1446,7 @@ def test_encoding_chunksizes_unlimited(self):
             "complevel": 0,
             "fletcher32": False,
             "contiguous": False,
-            "chunksizes": (2 ** 20,),
+            "chunksizes": (2**20,),
             "original_shape": (3,),
         }
         with self.roundtrip(ds) as actual:
@@ -1815,12 +1815,11 @@ def test_warning_on_bad_chunks(self):
         good_chunks = ({"dim2": 3}, {"dim3": (6, 4)}, {})
         for chunks in good_chunks:
             kwargs = {"chunks": chunks}
-            with pytest.warns(None) as record:
+            with assert_no_warnings():
                 with self.roundtrip(original, open_kwargs=kwargs) as actual:
                     for k, v in actual.variables.items():
                         # only index variables should be in memory
                         assert v._in_memory == (k in actual.dims)
-            assert len(record) == 0
 
     @requires_dask
     def test_deprecate_auto_chunk(self):
@@ -1896,20 +1895,24 @@ def test_chunk_encoding_with_dask(self):
             # don't actually check equality because the data could be corrupted
             pass
 
-        badenc.var1.encoding["chunks"] = (2,)
-        with pytest.raises(NotImplementedError, match=r"Specified Zarr chunk encoding"):
-            with self.roundtrip(badenc) as actual:
-                pass
+        # if dask chunks (4) are an integer multiple of zarr chunks (2) it should not fail...
+        goodenc = ds.chunk({"x": 4})
+        goodenc.var1.encoding["chunks"] = (2,)
+        with self.roundtrip(goodenc) as actual:
+            pass
 
-        badenc = badenc.chunk({"x": (3, 3, 6)})
-        badenc.var1.encoding["chunks"] = (3,)
-        with pytest.raises(
-            NotImplementedError, match=r"incompatible with this encoding"
-        ):
-            with self.roundtrip(badenc) as actual:
-                pass
+        # if initial dask chunks are aligned, size of last dask chunk doesn't matter
+        goodenc = ds.chunk({"x": (3, 3, 6)})
+        goodenc.var1.encoding["chunks"] = (3,)
+        with self.roundtrip(goodenc) as actual:
+            pass
 
-        # ... except if the last chunk is smaller than the first
+        goodenc = ds.chunk({"x": (3, 6, 3)})
+        goodenc.var1.encoding["chunks"] = (3,)
+        with self.roundtrip(goodenc) as actual:
+            pass
+
+        # ... also if the last chunk is irregular
         ds_chunk_irreg = ds.chunk({"x": (5, 5, 2)})
         with self.roundtrip(ds_chunk_irreg) as actual:
             assert (5,) == actual["var1"].encoding["chunks"]
@@ -1918,6 +1921,15 @@ def test_chunk_encoding_with_dask(self):
             with self.roundtrip(original) as actual:
                 assert_identical(original, actual)
 
+        # but itermediate unaligned chunks are bad
+        badenc = ds.chunk({"x": (3, 5, 3, 1)})
+        badenc.var1.encoding["chunks"] = (3,)
+        with pytest.raises(
+            NotImplementedError, match=r"would overlap multiple dask chunks"
+        ):
+            with self.roundtrip(badenc) as actual:
+                pass
+
         # - encoding specified  -
         # specify compatible encodings
         for chunk_enc in 4, (4,):
@@ -1925,7 +1937,7 @@ def test_chunk_encoding_with_dask(self):
             with self.roundtrip(ds_chunk4) as actual:
                 assert (4,) == actual["var1"].encoding["chunks"]
 
-        # TODO: remove this failure once syncronized overlapping writes are
+        # TODO: remove this failure once synchronized overlapping writes are
         # supported by xarray
         ds_chunk4["var1"].encoding.update({"chunks": 5})
         with pytest.raises(NotImplementedError, match=r"named 'var1' would overlap"):
@@ -2243,7 +2255,7 @@ def test_write_region_mode(self, mode):
 
     @requires_dask
     def test_write_preexisting_override_metadata(self):
-        """Metadata should be overriden if mode="a" but not in mode="r+"."""
+        """Metadata should be overridden if mode="a" but not in mode="r+"."""
         original = Dataset(
             {"u": (("x",), np.zeros(10), {"variable": "original"})},
             attrs={"global": "original"},
@@ -2375,6 +2387,15 @@ def test_chunk_encoding_with_partial_dask_chunks(self):
         ) as ds1:
             assert_equal(ds1, original)
 
+    @requires_dask
+    def test_chunk_encoding_with_larger_dask_chunks(self):
+        original = xr.Dataset({"a": ("x", [1, 2, 3, 4])}).chunk({"x": 2})
+
+        with self.roundtrip(
+            original, save_kwargs={"encoding": {"a": {"chunks": [1]}}}
+        ) as ds1:
+            assert_equal(ds1, original)
+
     @requires_cftime
     def test_open_zarr_use_cftime(self):
         ds = create_test_data()
@@ -2385,6 +2406,20 @@ def test_open_zarr_use_cftime(self):
             ds_b = xr.open_zarr(store_target, use_cftime=True)
             assert xr.coding.times.contains_cftime_datetimes(ds_b.time)
 
+    def test_write_read_select_write(self):
+        # Test for https://github.com/pydata/xarray/issues/4084
+        ds = create_test_data()
+
+        # NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug.
+        with self.create_zarr_target() as initial_store:
+            ds.to_zarr(initial_store, mode="w")
+            ds1 = xr.open_zarr(initial_store)
+
+        # Combination of where+squeeze triggers error on write.
+        ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3")
+        with self.create_zarr_target() as final_store:
+            ds_sel.to_zarr(final_store, mode="w")
+
 
 @requires_zarr
 class TestZarrDictStore(ZarrBase):
@@ -2932,7 +2967,7 @@ def test_open_fileobj(self):
                 with pytest.raises(TypeError, match="not a valid NetCDF 3"):
                     open_dataset(f, engine="scipy")
 
-            # TOOD: this additional open is required since scipy seems to close the file
+            # TODO: this additional open is required since scipy seems to close the file
             # when it fails on the TypeError (though didn't when we used
             # `raises_regex`?). Ref https://github.com/pydata/xarray/pull/5191
             with open(tmp_file, "rb") as f:
@@ -4222,8 +4257,8 @@ def create_tmp_geotiff(
             transform = from_origin(*transform_args)
         if additional_attrs is None:
             additional_attrs = {
-                "descriptions": tuple("d{}".format(n + 1) for n in range(nz)),
-                "units": tuple("u{}".format(n + 1) for n in range(nz)),
+                "descriptions": tuple(f"d{n + 1}" for n in range(nz)),
+                "units": tuple(f"u{n + 1}" for n in range(nz)),
             }
         with rasterio.open(
             tmp_file,
@@ -4691,11 +4726,14 @@ def test_rasterio_vrt(self):
                         assert actual_shape == expected_shape
                         assert expected_val.all() == actual_val.all()
 
+    @pytest.mark.filterwarnings(
+        "ignore:open_rasterio is Deprecated in favor of rioxarray."
+    )
     def test_rasterio_vrt_with_transform_and_size(self):
         # Test open_rasterio() support of WarpedVRT with transform, width and
         # height (issue #2864)
 
-        # https://github.com/mapbox/rasterio/1768
+        # https://github.com/rasterio/rasterio/1768
         rasterio = pytest.importorskip("rasterio", minversion="1.0.28")
         from affine import Affine
         from rasterio.warp import calculate_default_transform
@@ -4725,7 +4763,7 @@ def test_rasterio_vrt_with_transform_and_size(self):
     def test_rasterio_vrt_with_src_crs(self):
         # Test open_rasterio() support of WarpedVRT with specified src_crs
 
-        # https://github.com/mapbox/rasterio/1768
+        # https://github.com/rasterio/rasterio/1768
         rasterio = pytest.importorskip("rasterio", minversion="1.0.28")
 
         # create geotiff with no CRS and specify it manually
@@ -4948,10 +4986,9 @@ def test_dataarray_to_netcdf_no_name_pathlib(self):
 @requires_scipy_or_netCDF4
 def test_no_warning_from_dask_effective_get():
     with create_tmp_file() as tmpfile:
-        with pytest.warns(None) as record:
+        with assert_no_warnings():
             ds = Dataset()
             ds.to_netcdf(tmpfile)
-        assert len(record) == 0
 
 
 @requires_scipy_or_netCDF4
@@ -4993,7 +5030,7 @@ def test_use_cftime_standard_calendar_default_in_range(calendar):
 
     with create_tmp_file() as tmp_file:
         original.to_netcdf(tmp_file)
-        with pytest.warns(None) as record:
+        with warnings.catch_warnings(record=True) as record:
             with open_dataset(tmp_file) as ds:
                 assert_identical(expected_x, ds.x)
                 assert_identical(expected_time, ds.time)
@@ -5056,7 +5093,7 @@ def test_use_cftime_true(calendar, units_year):
 
     with create_tmp_file() as tmp_file:
         original.to_netcdf(tmp_file)
-        with pytest.warns(None) as record:
+        with warnings.catch_warnings(record=True) as record:
             with open_dataset(tmp_file, use_cftime=True) as ds:
                 assert_identical(expected_x, ds.x)
                 assert_identical(expected_time, ds.time)
@@ -5085,7 +5122,7 @@ def test_use_cftime_false_standard_calendar_in_range(calendar):
 
     with create_tmp_file() as tmp_file:
         original.to_netcdf(tmp_file)
-        with pytest.warns(None) as record:
+        with warnings.catch_warnings(record=True) as record:
             with open_dataset(tmp_file, use_cftime=False) as ds:
                 assert_identical(expected_x, ds.x)
                 assert_identical(expected_time, ds.time)
diff --git a/xarray/tests/test_calendar_ops.py b/xarray/tests/test_calendar_ops.py
index 8d1ddcf4689..0f0948aafc5 100644
--- a/xarray/tests/test_calendar_ops.py
+++ b/xarray/tests/test_calendar_ops.py
@@ -161,7 +161,7 @@ def test_convert_calendar_errors():
     with pytest.raises(ValueError, match="Argument `align_on` must be specified"):
         convert_calendar(src_nl, "360_day")
 
-    # Standard doesn't suuport year 0
+    # Standard doesn't support year 0
     with pytest.raises(
         ValueError, match="Source time coordinate contains dates with year 0"
     ):
diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 061c1420aba..3879959675f 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -18,6 +18,7 @@
     QuarterBegin,
     QuarterEnd,
     Second,
+    Tick,
     YearBegin,
     YearEnd,
     _days_in_month,
@@ -54,11 +55,25 @@ def calendar(request):
         (YearEnd(), 1),
         (QuarterBegin(), 1),
         (QuarterEnd(), 1),
+        (Tick(), 1),
+        (Day(), 1),
+        (Hour(), 1),
+        (Minute(), 1),
+        (Second(), 1),
+        (Millisecond(), 1),
+        (Microsecond(), 1),
         (BaseCFTimeOffset(n=2), 2),
         (YearBegin(n=2), 2),
         (YearEnd(n=2), 2),
         (QuarterBegin(n=2), 2),
         (QuarterEnd(n=2), 2),
+        (Tick(n=2), 2),
+        (Day(n=2), 2),
+        (Hour(n=2), 2),
+        (Minute(n=2), 2),
+        (Second(n=2), 2),
+        (Millisecond(n=2), 2),
+        (Microsecond(n=2), 2),
     ],
     ids=_id_func,
 )
@@ -74,6 +89,15 @@ def test_cftime_offset_constructor_valid_n(offset, expected_n):
         (YearEnd, 1.5),
         (QuarterBegin, 1.5),
         (QuarterEnd, 1.5),
+        (MonthBegin, 1.5),
+        (MonthEnd, 1.5),
+        (Tick, 1.5),
+        (Day, 1.5),
+        (Hour, 1.5),
+        (Minute, 1.5),
+        (Second, 1.5),
+        (Millisecond, 1.5),
+        (Microsecond, 1.5),
     ],
     ids=_id_func,
 )
@@ -359,30 +383,64 @@ def test_eq(a, b):
 
 
 _MUL_TESTS = [
-    (BaseCFTimeOffset(), BaseCFTimeOffset(n=3)),
-    (YearEnd(), YearEnd(n=3)),
-    (YearBegin(), YearBegin(n=3)),
-    (QuarterEnd(), QuarterEnd(n=3)),
-    (QuarterBegin(), QuarterBegin(n=3)),
-    (MonthEnd(), MonthEnd(n=3)),
-    (MonthBegin(), MonthBegin(n=3)),
-    (Day(), Day(n=3)),
-    (Hour(), Hour(n=3)),
-    (Minute(), Minute(n=3)),
-    (Second(), Second(n=3)),
-    (Millisecond(), Millisecond(n=3)),
-    (Microsecond(), Microsecond(n=3)),
+    (BaseCFTimeOffset(), 3, BaseCFTimeOffset(n=3)),
+    (YearEnd(), 3, YearEnd(n=3)),
+    (YearBegin(), 3, YearBegin(n=3)),
+    (QuarterEnd(), 3, QuarterEnd(n=3)),
+    (QuarterBegin(), 3, QuarterBegin(n=3)),
+    (MonthEnd(), 3, MonthEnd(n=3)),
+    (MonthBegin(), 3, MonthBegin(n=3)),
+    (Tick(), 3, Tick(n=3)),
+    (Day(), 3, Day(n=3)),
+    (Hour(), 3, Hour(n=3)),
+    (Minute(), 3, Minute(n=3)),
+    (Second(), 3, Second(n=3)),
+    (Millisecond(), 3, Millisecond(n=3)),
+    (Microsecond(), 3, Microsecond(n=3)),
+    (Day(), 0.5, Hour(n=12)),
+    (Hour(), 0.5, Minute(n=30)),
+    (Minute(), 0.5, Second(n=30)),
+    (Second(), 0.5, Millisecond(n=500)),
+    (Millisecond(), 0.5, Microsecond(n=500)),
 ]
 
 
-@pytest.mark.parametrize(("offset", "expected"), _MUL_TESTS, ids=_id_func)
-def test_mul(offset, expected):
-    assert offset * 3 == expected
+@pytest.mark.parametrize(("offset", "multiple", "expected"), _MUL_TESTS, ids=_id_func)
+def test_mul(offset, multiple, expected):
+    assert offset * multiple == expected
 
 
-@pytest.mark.parametrize(("offset", "expected"), _MUL_TESTS, ids=_id_func)
-def test_rmul(offset, expected):
-    assert 3 * offset == expected
+@pytest.mark.parametrize(("offset", "multiple", "expected"), _MUL_TESTS, ids=_id_func)
+def test_rmul(offset, multiple, expected):
+    assert multiple * offset == expected
+
+
+def test_mul_float_multiple_next_higher_resolution():
+    """Test more than one iteration through _next_higher_resolution is required."""
+    assert 1e-6 * Second() == Microsecond()
+    assert 1e-6 / 60 * Minute() == Microsecond()
+
+
+@pytest.mark.parametrize(
+    "offset",
+    [YearBegin(), YearEnd(), QuarterBegin(), QuarterEnd(), MonthBegin(), MonthEnd()],
+    ids=_id_func,
+)
+def test_nonTick_offset_multiplied_float_error(offset):
+    """Test that the appropriate error is raised if a non-Tick offset is
+    multiplied by a float."""
+    with pytest.raises(TypeError, match="unsupported operand type"):
+        offset * 0.5
+
+
+def test_Microsecond_multiplied_float_error():
+    """Test that the appropriate error is raised if a Tick offset is multiplied
+    by a float which causes it not to be representable by a
+    microsecond-precision timedelta."""
+    with pytest.raises(
+        ValueError, match="Could not convert to integer offset at any resolution"
+    ):
+        Microsecond() * 0.5
 
 
 @pytest.mark.parametrize(
@@ -1203,7 +1261,6 @@ def test_calendar_year_length(calendar, start, end, expected_number_of_days):
 
 @pytest.mark.parametrize("freq", ["A", "M", "D"])
 def test_dayofweek_after_cftime_range(freq):
-    pytest.importorskip("cftime", minversion="1.0.2.1")
     result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek
     expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek
     np.testing.assert_array_equal(result, expected)
@@ -1211,7 +1268,6 @@ def test_dayofweek_after_cftime_range(freq):
 
 @pytest.mark.parametrize("freq", ["A", "M", "D"])
 def test_dayofyear_after_cftime_range(freq):
-    pytest.importorskip("cftime", minversion="1.0.2.1")
     result = cftime_range("2000-02-01", periods=3, freq=freq).dayofyear
     expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofyear
     np.testing.assert_array_equal(result, expected)
diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
index 94f0cf4c2a5..2c6a0796c5f 100644
--- a/xarray/tests/test_cftimeindex.py
+++ b/xarray/tests/test_cftimeindex.py
@@ -1,3 +1,4 @@
+import pickle
 from datetime import timedelta
 from textwrap import dedent
 
@@ -754,7 +755,7 @@ def test_cftimeindex_add(index):
 
 @requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
-def test_cftimeindex_add_timedeltaindex(calendar):
+def test_cftimeindex_add_timedeltaindex(calendar) -> None:
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
     deltas = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)])
     result = a + deltas
@@ -763,6 +764,44 @@ def test_cftimeindex_add_timedeltaindex(calendar):
     assert isinstance(result, CFTimeIndex)
 
 
+@requires_cftime
+@pytest.mark.parametrize("n", [2.0, 1.5])
+@pytest.mark.parametrize(
+    "freq,units",
+    [
+        ("D", "D"),
+        ("H", "H"),
+        ("T", "min"),
+        ("S", "S"),
+        ("L", "ms"),
+    ],
+)
+@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
+def test_cftimeindex_shift_float(n, freq, units, calendar) -> None:
+    a = xr.cftime_range("2000", periods=3, calendar=calendar, freq="D")
+    result = a + pd.Timedelta(n, units)
+    expected = a.shift(n, freq)
+    assert result.equals(expected)
+    assert isinstance(result, CFTimeIndex)
+
+
+@requires_cftime
+def test_cftimeindex_shift_float_us() -> None:
+    a = xr.cftime_range("2000", periods=3, freq="D")
+    with pytest.raises(
+        ValueError, match="Could not convert to integer offset at any resolution"
+    ):
+        a.shift(2.5, "us")
+
+
+@requires_cftime
+@pytest.mark.parametrize("freq", ["AS", "A", "YS", "Y", "QS", "Q", "MS", "M"])
+def test_cftimeindex_shift_float_fails_for_non_tick_freqs(freq) -> None:
+    a = xr.cftime_range("2000", periods=3, freq="D")
+    with pytest.raises(TypeError, match="unsupported operand type"):
+        a.shift(2.5, freq)
+
+
 @requires_cftime
 def test_cftimeindex_radd(index):
     date_type = index.date_type
@@ -780,7 +819,7 @@ def test_cftimeindex_radd(index):
 
 @requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
-def test_timedeltaindex_add_cftimeindex(calendar):
+def test_timedeltaindex_add_cftimeindex(calendar) -> None:
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
     deltas = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)])
     result = deltas + a
@@ -828,7 +867,7 @@ def test_cftimeindex_sub_timedelta_array(index, other):
 
 @requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
-def test_cftimeindex_sub_cftimeindex(calendar):
+def test_cftimeindex_sub_cftimeindex(calendar) -> None:
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
     b = a.shift(2, "D")
     result = b - a
@@ -867,7 +906,7 @@ def test_distant_cftime_datetime_sub_cftimeindex(calendar):
 
 @requires_cftime
 @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
-def test_cftimeindex_sub_timedeltaindex(calendar):
+def test_cftimeindex_sub_timedeltaindex(calendar) -> None:
     a = xr.cftime_range("2000", periods=5, calendar=calendar)
     deltas = pd.TimedeltaIndex([timedelta(days=2) for _ in range(5)])
     result = a - deltas
@@ -903,7 +942,7 @@ def test_cftimeindex_rsub(index):
 
 @requires_cftime
 @pytest.mark.parametrize("freq", ["D", timedelta(days=1)])
-def test_cftimeindex_shift(index, freq):
+def test_cftimeindex_shift(index, freq) -> None:
     date_type = index.date_type
     expected_dates = [
         date_type(1, 1, 3),
@@ -918,14 +957,14 @@ def test_cftimeindex_shift(index, freq):
 
 
 @requires_cftime
-def test_cftimeindex_shift_invalid_n():
+def test_cftimeindex_shift_invalid_n() -> None:
     index = xr.cftime_range("2000", periods=3)
     with pytest.raises(TypeError):
         index.shift("a", "D")
 
 
 @requires_cftime
-def test_cftimeindex_shift_invalid_freq():
+def test_cftimeindex_shift_invalid_freq() -> None:
     index = xr.cftime_range("2000", periods=3)
     with pytest.raises(TypeError):
         index.shift(1, 1)
@@ -1289,3 +1328,12 @@ def test_infer_freq(freq, calendar):
     indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar)
     out = xr.infer_freq(indx)
     assert out == freq
+
+
+@requires_cftime
+@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
+def test_pickle_cftimeindex(calendar):
+
+    idx = xr.cftime_range("2000-01-01", periods=3, freq="D", calendar=calendar)
+    idx_pkl = pickle.loads(pickle.dumps(idx))
+    assert (idx == idx_pkl).all()
diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py
index ef2eeac0e0b..7d6613421d5 100644
--- a/xarray/tests/test_coarsen.py
+++ b/xarray/tests/test_coarsen.py
@@ -58,7 +58,7 @@ def test_coarsen_coords(ds, dask) -> None:
     da = xr.DataArray(
         np.linspace(0, 365, num=364),
         dims="time",
-        coords={"time": pd.date_range("15/12/1999", periods=364)},
+        coords={"time": pd.date_range("1999-12-15", periods=364)},
     )
     actual = da.coarsen(time=2).mean()
 
@@ -158,7 +158,7 @@ def test_coarsen_keep_attrs(funcname, argument) -> None:
 @pytest.mark.parametrize("window", (1, 2, 3, 4))
 @pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median"))
 def test_coarsen_reduce(ds, window, name) -> None:
-    # Use boundary="trim" to accomodate all window sizes used in tests
+    # Use boundary="trim" to accommodate all window sizes used in tests
     coarsen_obj = ds.coarsen(time=window, boundary="trim")
 
     # add nan prefix to numpy methods to get similar behavior as bottleneck
@@ -241,7 +241,7 @@ def test_coarsen_da_reduce(da, window, name) -> None:
     if da.isnull().sum() > 1 and window == 1:
         pytest.skip("These parameters lead to all-NaN slices")
 
-    # Use boundary="trim" to accomodate all window sizes used in tests
+    # Use boundary="trim" to accommodate all window sizes used in tests
     coarsen_obj = da.coarsen(time=window, boundary="trim")
 
     # add nan prefix to numpy methods to get similar # behavior as bottleneck
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 2e19ddb3a75..92d27f22eb8 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -32,6 +32,7 @@
 from . import (
     arm_xfail,
     assert_array_equal,
+    assert_no_warnings,
     has_cftime,
     has_cftime_1_4_1,
     requires_cftime,
@@ -905,10 +906,9 @@ def test_use_cftime_default_standard_calendar_in_range(calendar) -> None:
     units = "days since 2000-01-01"
     expected = pd.date_range("2000", periods=2)
 
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         result = decode_cf_datetime(numerical_dates, units, calendar)
         np.testing.assert_array_equal(result, expected)
-        assert not record
 
 
 @requires_cftime
@@ -942,10 +942,9 @@ def test_use_cftime_default_non_standard_calendar(calendar, units_year) -> None:
         numerical_dates, units, calendar, only_use_cftime_datetimes=True
     )
 
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         result = decode_cf_datetime(numerical_dates, units, calendar)
         np.testing.assert_array_equal(result, expected)
-        assert not record
 
 
 @requires_cftime
@@ -960,10 +959,9 @@ def test_use_cftime_true(calendar, units_year) -> None:
         numerical_dates, units, calendar, only_use_cftime_datetimes=True
     )
 
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=True)
         np.testing.assert_array_equal(result, expected)
-        assert not record
 
 
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
@@ -972,10 +970,9 @@ def test_use_cftime_false_standard_calendar_in_range(calendar) -> None:
     units = "days since 2000-01-01"
     expected = pd.date_range("2000", periods=2)
 
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         result = decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False)
         np.testing.assert_array_equal(result, expected)
-        assert not record
 
 
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index c9a10b7cc43..dac3c17b1f1 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -475,13 +475,10 @@ def test_unified_dim_sizes() -> None:
         "x": 1,
         "y": 2,
     }
-    assert (
-        unified_dim_sizes(
-            [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])],
-            exclude_dims={"z"},
-        )
-        == {"x": 1, "y": 2}
-    )
+    assert unified_dim_sizes(
+        [xr.Variable(("x", "z"), [[1]]), xr.Variable(("y", "z"), [[1, 2], [3, 4]])],
+        exclude_dims={"z"},
+    ) == {"x": 1, "y": 2}
 
     # duplicate dimensions
     with pytest.raises(ValueError):
@@ -1557,6 +1554,7 @@ def test_covcorr_consistency(da_a, da_b, dim) -> None:
 @requires_dask
 @pytest.mark.parametrize("da_a, da_b", arrays_w_tuples()[1])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
+@pytest.mark.filterwarnings("ignore:invalid value encountered in .*divide")
 def test_corr_lazycorr_consistency(da_a, da_b, dim) -> None:
     da_al = da_a.chunk()
     da_bl = da_b.chunk()
@@ -1922,6 +1920,15 @@ def test_where() -> None:
     assert_identical(expected, actual)
 
 
+def test_where_attrs() -> None:
+    cond = xr.DataArray([True, False], dims="x", attrs={"attr": "cond"})
+    x = xr.DataArray([1, 1], dims="x", attrs={"attr": "x"})
+    y = xr.DataArray([0, 0], dims="x", attrs={"attr": "y"})
+    actual = xr.where(cond, x, y, keep_attrs=True)
+    expected = xr.DataArray([1, 0], dims="x", attrs={"attr": "x"})
+    assert_identical(expected, actual)
+
+
 @pytest.mark.parametrize("use_dask", [True, False])
 @pytest.mark.parametrize("use_datetime", [True, False])
 def test_polyval(use_dask, use_datetime) -> None:
@@ -1938,7 +1945,7 @@ def test_polyval(use_dask, use_datetime) -> None:
         xcoord = xr.DataArray(x, dims=("x",), name="x")
 
     da = xr.DataArray(
-        np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+        np.stack((1.0 + x + 2.0 * x**2, 1.0 + 2.0 * x + 3.0 * x**2)),
         dims=("d", "x"),
         coords={"x": xcoord, "d": [0, 1]},
     )
@@ -2031,7 +2038,7 @@ def test_polyval(use_dask, use_datetime) -> None:
             "cartesian",
             -1,
         ],
-        [  # Test filling inbetween with coords:
+        [  # Test filling in between with coords:
             xr.DataArray(
                 [1, 2],
                 dims=["cartesian"],
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index a8d06188844..8a37df62261 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -7,7 +7,6 @@
 
 from xarray import DataArray, Dataset, Variable, concat
 from xarray.core import dtypes, merge
-from xarray.core.concat import compat_options, concat_options
 
 from . import (
     InaccessibleArray,
diff --git a/xarray/tests/test_cupy.py b/xarray/tests/test_cupy.py
index e8f35e12ac6..79a540cdb38 100644
--- a/xarray/tests/test_cupy.py
+++ b/xarray/tests/test_cupy.py
@@ -11,7 +11,7 @@
 def toy_weather_data():
     """Construct the example DataSet from the Toy weather data example.
 
-    http://xarray.pydata.org/en/stable/examples/weather-data.html
+    https://docs.xarray.dev/en/stable/examples/weather-data.html
 
     Here we construct the DataSet exactly as shown in the example and then
     convert the numpy arrays to cupy.
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 48432f319b2..42d8df57cb7 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -460,7 +460,7 @@ def test_concat_loads_variables(self):
         assert isinstance(out["c"].data, dask.array.Array)
 
         out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=[], coords=[])
-        # variables are loaded once as we are validing that they're identical
+        # variables are loaded once as we are validating that they're identical
         assert kernel_call_count == 12
         assert isinstance(out["d"].data, np.ndarray)
         assert isinstance(out["c"].data, np.ndarray)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 72ccc80bd06..55c68b7ff6b 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -33,6 +33,7 @@
     assert_chunks_equal,
     assert_equal,
     assert_identical,
+    assert_no_warnings,
     has_dask,
     raise_if_dask_computes,
     requires_bottleneck,
@@ -173,7 +174,7 @@ def test_get_index_size_zero(self):
 
     def test_struct_array_dims(self):
         """
-        This test checks subraction of two DataArrays for the case
+        This test checks subtraction of two DataArrays for the case
         when dimension is a structured array.
         """
         # GH837, GH861
@@ -196,7 +197,7 @@ def test_struct_array_dims(self):
 
         assert_identical(actual, expected)
 
-        # checking array subraction when dims are not the same
+        # checking array subtraction when dims are not the same
         p_data_alt = np.array(
             [("Abe", 180), ("Stacy", 151), ("Dick", 200)],
             dtype=[("name", "|S256"), ("height", object)],
@@ -212,7 +213,7 @@ def test_struct_array_dims(self):
 
         assert_identical(actual, expected)
 
-        # checking array subraction when dims are not the same and one
+        # checking array subtraction when dims are not the same and one
         # is np.nan
         p_data_nan = np.array(
             [("Abe", 180), ("Stacy", np.nan), ("Dick", 200)],
@@ -1528,13 +1529,17 @@ def test_reindex_regressions(self):
         re_dtype = x.reindex_like(y, method="pad").dtype
         assert x.dtype == re_dtype
 
-    def test_reindex_method(self):
+    def test_reindex_method(self) -> None:
         x = DataArray([10, 20], dims="y", coords={"y": [0, 1]})
         y = [-0.1, 0.5, 1.1]
         actual = x.reindex(y=y, method="backfill", tolerance=0.2)
         expected = DataArray([10, np.nan, np.nan], coords=[("y", y)])
         assert_identical(expected, actual)
 
+        actual = x.reindex(y=y, method="backfill", tolerance=[0.1, 0.1, 0.01])
+        expected = DataArray([10, np.nan, np.nan], coords=[("y", y)])
+        assert_identical(expected, actual)
+
         alt = Dataset({"y": y})
         actual = x.reindex_like(alt, method="backfill")
         expected = DataArray([10, 20, np.nan], coords=[("y", y)])
@@ -2157,18 +2162,11 @@ def test_stack_unstack(self):
 
         # test GH3000
         a = orig[:0, :1].stack(dim=("x", "y")).dim.to_index()
-        if pd.__version__ < "0.24.0":
-            b = pd.MultiIndex(
-                levels=[pd.Int64Index([]), pd.Int64Index([0])],
-                labels=[[], []],
-                names=["x", "y"],
-            )
-        else:
-            b = pd.MultiIndex(
-                levels=[pd.Int64Index([]), pd.Int64Index([0])],
-                codes=[[], []],
-                names=["x", "y"],
-            )
+        b = pd.MultiIndex(
+            levels=[pd.Index([], np.int64), pd.Index([0], np.int64)],
+            codes=[[], []],
+            names=["x", "y"],
+        )
         pd.testing.assert_index_equal(a, b)
 
         actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"])
@@ -2518,15 +2516,19 @@ def test_reduce_out(self):
         with pytest.raises(TypeError):
             orig.mean(out=np.ones(orig.shape))
 
-    @pytest.mark.parametrize("skipna", [True, False])
+    @pytest.mark.parametrize("skipna", [True, False, None])
     @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
     @pytest.mark.parametrize(
         "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
     )
-    def test_quantile(self, q, axis, dim, skipna):
-        actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
-        _percentile_func = np.nanpercentile if skipna else np.percentile
-        expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis)
+    def test_quantile(self, q, axis, dim, skipna) -> None:
+
+        va = self.va.copy(deep=True)
+        va[0, 0] = np.NaN
+
+        actual = DataArray(va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
+        _percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
+        expected = _percentile_func(va.values, np.array(q) * 100, axis=axis)
         np.testing.assert_allclose(actual.values, expected)
         if is_scalar(q):
             assert "quantile" not in actual.dims
@@ -2535,6 +2537,38 @@ def test_quantile(self, q, axis, dim, skipna):
 
         assert actual.attrs == self.attrs
 
+    @pytest.mark.parametrize("method", ["midpoint", "lower"])
+    def test_quantile_method(self, method) -> None:
+        q = [0.25, 0.5, 0.75]
+        actual = DataArray(self.va).quantile(q, method=method)
+
+        if Version(np.__version__) >= Version("1.22.0"):
+            expected = np.nanquantile(self.dv.values, np.array(q), method=method)  # type: ignore[call-arg]
+        else:
+            expected = np.nanquantile(self.dv.values, np.array(q), interpolation=method)  # type: ignore[call-arg]
+
+        np.testing.assert_allclose(actual.values, expected)
+
+    @pytest.mark.parametrize("method", ["midpoint", "lower"])
+    def test_quantile_interpolation_deprecated(self, method) -> None:
+
+        da = DataArray(self.va)
+        q = [0.25, 0.5, 0.75]
+
+        with pytest.warns(
+            FutureWarning,
+            match="`interpolation` argument to quantile was renamed to `method`",
+        ):
+            actual = da.quantile(q, interpolation=method)
+
+        expected = da.quantile(q, method=method)
+
+        np.testing.assert_allclose(actual.values, expected.values)
+
+        with warnings.catch_warnings(record=True):
+            with pytest.raises(TypeError, match="interpolation and method keywords"):
+                da.quantile(q, method=method, interpolation=method)
+
     def test_reduce_keep_attrs(self):
         # Test dropped attrs
         vm = self.va.mean()
@@ -3730,7 +3764,7 @@ def test_polyfit(self, use_dask, use_datetime):
 
         da_raw = DataArray(
             np.stack(
-                (10 + 1e-15 * x + 2e-28 * x ** 2, 30 + 2e-14 * x + 1e-29 * x ** 2)
+                (10 + 1e-15 * x + 2e-28 * x**2, 30 + 2e-14 * x + 1e-29 * x**2)
             ),
             dims=("d", "x"),
             coords={"x": xcoord, "d": [0, 1]},
@@ -6126,9 +6160,8 @@ def test_rolling_keep_attrs(funcname, argument):
 
 
 def test_raise_no_warning_for_nan_in_binary_ops():
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         xr.DataArray([1, 2, np.NaN]) > 0
-    assert len(record) == 0
 
 
 @pytest.mark.filterwarnings("error")
@@ -6589,25 +6622,50 @@ def test_clip(da):
         result = da.clip(min=da.mean("x"), max=da.mean("a").isel(x=[0, 1]))
 
 
-@pytest.mark.parametrize("keep", ["first", "last", False])
-def test_drop_duplicates(keep):
-    ds = xr.DataArray(
-        [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test"
-    )
+class TestDropDuplicates:
+    @pytest.mark.parametrize("keep", ["first", "last", False])
+    def test_drop_duplicates_1d(self, keep):
+        da = xr.DataArray(
+            [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test"
+        )
 
-    if keep == "first":
-        data = [0, 6, 7]
-        time = [0, 1, 2]
-    elif keep == "last":
-        data = [5, 6, 7]
-        time = [0, 1, 2]
-    else:
-        data = [6, 7]
-        time = [1, 2]
+        if keep == "first":
+            data = [0, 6, 7]
+            time = [0, 1, 2]
+        elif keep == "last":
+            data = [5, 6, 7]
+            time = [0, 1, 2]
+        else:
+            data = [6, 7]
+            time = [1, 2]
+
+        expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test")
+        result = da.drop_duplicates("time", keep=keep)
+        assert_equal(expected, result)
+
+        with pytest.raises(ValueError, match="['space'] not found"):
+            da.drop_duplicates("space", keep=keep)
+
+    def test_drop_duplicates_2d(self):
+        da = xr.DataArray(
+            [[0, 5, 6, 7], [2, 1, 3, 4]],
+            dims=["space", "time"],
+            coords={"space": [10, 10], "time": [0, 0, 1, 2]},
+            name="test",
+        )
+
+        expected = xr.DataArray(
+            [[0, 6, 7]],
+            dims=["space", "time"],
+            coords={"time": ("time", [0, 1, 2]), "space": ("space", [10])},
+            name="test",
+        )
+
+        result = da.drop_duplicates(["time", "space"], keep="first")
+        assert_equal(expected, result)
 
-    expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test")
-    result = ds.drop_duplicates("time", keep=keep)
-    assert_equal(expected, result)
+        result = da.drop_duplicates(..., keep="first")
+        assert_equal(expected, result)
 
 
 class TestNumpyCoercion:
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index c8770601c30..8d6c4f96857 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -38,6 +38,7 @@
     assert_array_equal,
     assert_equal,
     assert_identical,
+    assert_no_warnings,
     create_test_data,
     has_cftime,
     has_dask,
@@ -586,7 +587,7 @@ def test_get_index(self):
 
     def test_attr_access(self):
         ds = Dataset(
-            {"tmin": ("x", [42], {"units": "Celcius"})}, attrs={"title": "My test data"}
+            {"tmin": ("x", [42], {"units": "Celsius"})}, attrs={"title": "My test data"}
         )
         assert_identical(ds.tmin, ds["tmin"])
         assert_identical(ds.tmin.x, ds.x)
@@ -1873,7 +1874,7 @@ def test_reindex_warning(self):
 
         # Should not warn
         ind = xr.DataArray([0.0, 1.0], dims=["dim2"], name="ind")
-        with pytest.warns(None) as ws:
+        with warnings.catch_warnings(record=True) as ws:
             data.reindex(dim2=ind)
             assert len(ws) == 0
 
@@ -1883,7 +1884,7 @@ def test_reindex_variables_copied(self):
         for k in data.variables:
             assert reindexed_data.variables[k] is not data.variables[k]
 
-    def test_reindex_method(self):
+    def test_reindex_method(self) -> None:
         ds = Dataset({"x": ("y", [10, 20]), "y": [0, 1]})
         y = [-0.5, 0.5, 1.5]
         actual = ds.reindex(y=y, method="backfill")
@@ -1894,6 +1895,14 @@ def test_reindex_method(self):
         expected = Dataset({"x": ("y", 3 * [np.nan]), "y": y})
         assert_identical(expected, actual)
 
+        actual = ds.reindex(y=y, method="backfill", tolerance=[0.1, 0.5, 0.1])
+        expected = Dataset({"x": ("y", [np.nan, 20, np.nan]), "y": y})
+        assert_identical(expected, actual)
+
+        actual = ds.reindex(y=[0.1, 0.1, 1], tolerance=[0, 0.1, 0], method="nearest")
+        expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": [0.1, 0.1, 1]})
+        assert_identical(expected, actual)
+
         actual = ds.reindex(y=y, method="pad")
         expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": y})
         assert_identical(expected, actual)
@@ -3636,7 +3645,7 @@ def test_assign(self):
         assert list(actual.variables) == ["x", "y"]
         assert_identical(ds, Dataset())
 
-        actual = actual.assign(y=lambda ds: ds.x ** 2)
+        actual = actual.assign(y=lambda ds: ds.x**2)
         expected = Dataset({"y": ("x", [0, 1, 4]), "x": [0, 1, 2]})
         assert_identical(actual, expected)
 
@@ -4709,10 +4718,11 @@ def test_reduce_keepdims(self):
         )
         assert_identical(expected, actual)
 
-    @pytest.mark.parametrize("skipna", [True, False])
+    @pytest.mark.parametrize("skipna", [True, False, None])
     @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
-    def test_quantile(self, q, skipna):
+    def test_quantile(self, q, skipna) -> None:
         ds = create_test_data(seed=123)
+        ds.var1.data[0, 0] = np.NaN
 
         for dim in [None, "dim1", ["dim1"]]:
             ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)
@@ -4732,7 +4742,7 @@ def test_quantile(self, q, skipna):
         assert all(d not in ds_quantile.dims for d in dim)
 
     @pytest.mark.parametrize("skipna", [True, False])
-    def test_quantile_skipna(self, skipna):
+    def test_quantile_skipna(self, skipna) -> None:
         q = 0.1
         dim = "time"
         ds = Dataset({"a": ([dim], np.arange(0, 11))})
@@ -4744,6 +4754,34 @@ def test_quantile_skipna(self, skipna):
         expected = Dataset({"a": value}, coords={"quantile": q})
         assert_identical(result, expected)
 
+    @pytest.mark.parametrize("method", ["midpoint", "lower"])
+    def test_quantile_method(self, method) -> None:
+
+        ds = create_test_data(seed=123)
+        q = [0.25, 0.5, 0.75]
+
+        result = ds.quantile(q, method=method)
+
+        assert_identical(result.var1, ds.var1.quantile(q, method=method))
+        assert_identical(result.var2, ds.var2.quantile(q, method=method))
+        assert_identical(result.var3, ds.var3.quantile(q, method=method))
+
+    @pytest.mark.parametrize("method", ["midpoint", "lower"])
+    def test_quantile_interpolation_deprecated(self, method) -> None:
+
+        ds = create_test_data(seed=123)
+        q = [0.25, 0.5, 0.75]
+
+        with warnings.catch_warnings(record=True) as w:
+            ds.quantile(q, interpolation=method)
+
+            # ensure the warning is only raised once
+            assert len(w) == 1
+
+        with warnings.catch_warnings(record=True):
+            with pytest.raises(TypeError, match="interpolation and method keywords"):
+                ds.quantile(q, method=method, interpolation=method)
+
     @requires_bottleneck
     def test_rank(self):
         ds = create_test_data(seed=1234)
@@ -6129,9 +6167,8 @@ def test_ndrolling_construct(center, fill_value, dask):
 
 
 def test_raise_no_warning_for_nan_in_binary_ops():
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         Dataset(data_vars={"x": ("y", [1, 2, np.NaN])}) > 0
-    assert len(record) == 0
 
 
 @pytest.mark.filterwarnings("error")
@@ -6510,6 +6547,37 @@ def test_clip(ds):
     assert result.dims == ds.dims
 
 
+class TestDropDuplicates:
+    @pytest.mark.parametrize("keep", ["first", "last", False])
+    def test_drop_duplicates_1d(self, keep):
+        ds = xr.Dataset(
+            {"a": ("time", [0, 5, 6, 7]), "b": ("time", [9, 3, 8, 2])},
+            coords={"time": [0, 0, 1, 2]},
+        )
+
+        if keep == "first":
+            a = [0, 6, 7]
+            b = [9, 8, 2]
+            time = [0, 1, 2]
+        elif keep == "last":
+            a = [5, 6, 7]
+            b = [3, 8, 2]
+            time = [0, 1, 2]
+        else:
+            a = [6, 7]
+            b = [8, 2]
+            time = [1, 2]
+
+        expected = xr.Dataset(
+            {"a": ("time", a), "b": ("time", b)}, coords={"time": time}
+        )
+        result = ds.drop_duplicates("time", keep=keep)
+        assert_equal(expected, result)
+
+        with pytest.raises(ValueError, match="['space'] not found"):
+            ds.drop_duplicates("space", keep=keep)
+
+
 class TestNumpyCoercion:
     def test_from_numpy(self):
         ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])})
diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py
index 92f39069aa3..773733b7b89 100644
--- a/xarray/tests/test_distributed.py
+++ b/xarray/tests/test_distributed.py
@@ -1,15 +1,16 @@
 """ isort:skip_file """
 import pickle
+import numpy as np
 
 import pytest
+from packaging.version import Version
 
 dask = pytest.importorskip("dask")  # isort:skip
 distributed = pytest.importorskip("distributed")  # isort:skip
 
 from dask.distributed import Client, Lock
-from distributed.utils_test import cluster, gen_cluster
-from distributed.utils_test import loop
 from distributed.client import futures_of
+from distributed.utils_test import cluster, gen_cluster, loop
 
 import xarray as xr
 from xarray.backends.locks import HDF5_LOCK, CombinedLock
@@ -23,12 +24,15 @@
 
 from . import (
     assert_allclose,
+    assert_identical,
     has_h5netcdf,
     has_netCDF4,
     requires_rasterio,
     has_scipy,
     requires_zarr,
     requires_cfgrib,
+    requires_cftime,
+    requires_netCDF4,
 )
 
 # this is to stop isort throwing errors. May have been easier to just use
@@ -105,6 +109,22 @@ def test_dask_distributed_netcdf_roundtrip(
                 assert_allclose(original, computed)
 
 
+@requires_cftime
+@requires_netCDF4
+def test_open_mfdataset_can_open_files_with_cftime_index(tmp_path):
+    T = xr.cftime_range("20010101", "20010501", calendar="360_day")
+    Lon = np.arange(100)
+    data = np.random.random((T.size, Lon.size))
+    da = xr.DataArray(data, coords={"time": T, "Lon": Lon}, name="test")
+    file_path = tmp_path / "test.nc"
+    da.to_netcdf(file_path)
+    with cluster() as (s, [a, b]):
+        with Client(s["address"]):
+            for parallel in (False, True):
+                with xr.open_mfdataset(file_path, parallel=parallel) as tf:
+                    assert_identical(tf["test"], da)
+
+
 @pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS)
 def test_dask_distributed_read_netcdf_integration_test(
     loop, tmp_netcdf_filename, engine, nc_format
@@ -160,6 +180,7 @@ def test_dask_distributed_zarr_integration_test(loop, consolidated, compute) ->
 
 
 @requires_rasterio
+@pytest.mark.filterwarnings("ignore:deallocating CachingFileManager")
 def test_dask_distributed_rasterio_integration_test(loop) -> None:
     with create_tmp_geotiff() as (tmp_file, expected):
         with cluster() as (s, [a, b]):
@@ -184,6 +205,10 @@ def test_dask_distributed_cfgrib_integration_test(loop) -> None:
                     assert_allclose(actual, expected)
 
 
+@pytest.mark.xfail(
+    condition=Version(distributed.__version__) < Version("2022.02.0"),
+    reason="https://github.com/dask/distributed/pull/5739",
+)
 @gen_cluster(client=True)
 async def test_async(c, s, a, b) -> None:
     x = create_test_data()
@@ -216,6 +241,10 @@ def test_hdf5_lock() -> None:
     assert isinstance(HDF5_LOCK, dask.utils.SerializableLock)
 
 
+@pytest.mark.xfail(
+    condition=Version(distributed.__version__) < Version("2022.02.0"),
+    reason="https://github.com/dask/distributed/pull/5739",
+)
 @gen_cluster(client=True)
 async def test_serializable_locks(c, s, a, b) -> None:
     def f(x, lock=None):
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index a85e7d737dc..024edf99510 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -1,3 +1,6 @@
+import warnings
+from typing import Union
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -200,6 +203,17 @@ def test_da_groupby_quantile() -> None:
     actual = array.groupby("x").quantile([0, 1])
     assert_identical(expected, actual)
 
+    array = xr.DataArray(
+        data=[np.NaN, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x"
+    )
+
+    for skipna in (True, False, None):
+        e = [np.NaN, 5] if skipna is False else [2.5, 5]
+
+        expected = xr.DataArray(data=e, coords={"x": [1, 2], "quantile": 0.5}, dims="x")
+        actual = array.groupby("x").quantile(0.5, skipna=skipna)
+        assert_identical(expected, actual)
+
     # Multiple dimensions
     array = xr.DataArray(
         data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
@@ -273,6 +287,15 @@ def test_da_groupby_quantile() -> None:
     )
     assert_identical(expected, actual)
 
+    # method keyword
+    array = xr.DataArray(data=[1, 2, 3, 4], coords={"x": [1, 1, 2, 2]}, dims="x")
+
+    expected = xr.DataArray(
+        data=[1, 3], coords={"x": [1, 2], "quantile": 0.5}, dims="x"
+    )
+    actual = array.groupby("x").quantile(0.5, method="lower")
+    assert_identical(expected, actual)
+
 
 def test_ds_groupby_quantile() -> None:
     ds = xr.Dataset(
@@ -294,6 +317,20 @@ def test_ds_groupby_quantile() -> None:
     actual = ds.groupby("x").quantile([0, 1])
     assert_identical(expected, actual)
 
+    ds = xr.Dataset(
+        data_vars={"a": ("x", [np.NaN, 2, 3, 4, 5, 6])},
+        coords={"x": [1, 1, 1, 2, 2, 2]},
+    )
+
+    for skipna in (True, False, None):
+        e = [np.NaN, 5] if skipna is False else [2.5, 5]
+
+        expected = xr.Dataset(
+            data_vars={"a": ("x", e)}, coords={"quantile": 0.5, "x": [1, 2]}
+        )
+        actual = ds.groupby("x").quantile(0.5, skipna=skipna)
+        assert_identical(expected, actual)
+
     # Multiple dimensions
     ds = xr.Dataset(
         data_vars={
@@ -367,6 +404,38 @@ def test_ds_groupby_quantile() -> None:
     )
     assert_identical(expected, actual)
 
+    ds = xr.Dataset(data_vars={"a": ("x", [1, 2, 3, 4])}, coords={"x": [1, 1, 2, 2]})
+
+    # method keyword
+    expected = xr.Dataset(
+        data_vars={"a": ("x", [1, 3])}, coords={"quantile": 0.5, "x": [1, 2]}
+    )
+    actual = ds.groupby("x").quantile(0.5, method="lower")
+    assert_identical(expected, actual)
+
+
+@pytest.mark.parametrize("as_dataset", [False, True])
+def test_groupby_quantile_interpolation_deprecated(as_dataset) -> None:
+
+    array = xr.DataArray(data=[1, 2, 3, 4], coords={"x": [1, 1, 2, 2]}, dims="x")
+
+    arr: Union[xr.DataArray, xr.Dataset]
+    arr = array.to_dataset(name="name") if as_dataset else array
+
+    with pytest.warns(
+        FutureWarning,
+        match="`interpolation` argument to quantile was renamed to `method`",
+    ):
+        actual = arr.quantile(0.5, interpolation="lower")
+
+    expected = arr.quantile(0.5, method="lower")
+
+    assert_identical(actual, expected)
+
+    with warnings.catch_warnings(record=True):
+        with pytest.raises(TypeError, match="interpolation and method keywords"):
+            arr.quantile(0.5, method="lower", interpolation="lower")
+
 
 def test_da_groupby_assign_coords() -> None:
     actual = xr.DataArray(
@@ -765,7 +834,7 @@ def test_groupby_math_nD_group() -> None:
     g = da.groupby_bins("num2d", bins=[0, 4, 6])
     mean = g.mean()
     idxr = np.digitize(da.num2d, bins=(0, 4, 6), right=True)[:30, :] - 1
-    expanded_mean = mean.drop("num2d_bins").isel(num2d_bins=(("x", "y"), idxr))
+    expanded_mean = mean.drop_vars("num2d_bins").isel(num2d_bins=(("x", "y"), idxr))
     expected = da.isel(x=slice(30)) - expanded_mean
     expected["labels"] = expected.labels.broadcast_like(expected.labels2d)
     expected["num"] = expected.num.broadcast_like(expected.num2d)
@@ -1291,7 +1360,7 @@ def test_groupby_bins_sort(self):
             np.arange(100), dims="x", coords={"x": np.linspace(-100, 100, num=100)}
         )
         binned_mean = data.groupby_bins("x", bins=11).mean()
-        assert binned_mean.to_index().is_monotonic
+        assert binned_mean.to_index().is_monotonic_increasing
 
         with xr.set_options(use_flox=True):
             actual = data.groupby_bins("x", bins=11).count()
@@ -1472,7 +1541,7 @@ def test_upsample(self):
 
         # Pad
         actual = array.resample(time="3H").pad()
-        expected = DataArray(array.to_series().resample("3H").pad())
+        expected = DataArray(array.to_series().resample("3H").ffill())
         assert_identical(expected, actual)
 
         # Nearest
diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py
index fd480436889..2a6de0be550 100644
--- a/xarray/tests/test_interp.py
+++ b/xarray/tests/test_interp.py
@@ -30,7 +30,7 @@ def get_example_data(case):
     data = xr.DataArray(
         np.sin(x[:, np.newaxis]) * np.cos(y),
         dims=["x", "y"],
-        coords={"x": x, "y": y, "x2": ("x", x ** 2)},
+        coords={"x": x, "y": y, "x2": ("x", x**2)},
     )
 
     if case == 0:
@@ -46,7 +46,7 @@ def get_example_data(case):
         return xr.DataArray(
             np.sin(x[:, np.newaxis, np.newaxis]) * np.cos(y[:, np.newaxis]) * z,
             dims=["x", "y", "z"],
-            coords={"x": x, "y": y, "x2": ("x", x ** 2), "z": z},
+            coords={"x": x, "y": y, "x2": ("x", x**2), "z": z},
         )
     elif case == 4:
         return get_example_data(3).chunk({"z": 5})
@@ -440,7 +440,7 @@ def test_sorted():
     da = xr.DataArray(
         np.cos(x[:, np.newaxis, np.newaxis]) * np.cos(y[:, np.newaxis]) * z,
         dims=["x", "y", "z"],
-        coords={"x": x, "y": y, "x2": ("x", x ** 2), "z": z},
+        coords={"x": x, "y": y, "x2": ("x", x**2), "z": z},
     )
 
     x_new = np.linspace(0, 1, 30)
@@ -770,7 +770,7 @@ def test_decompose(method):
     ],
 )
 def test_interpolate_chunk_1d(method, data_ndim, interp_ndim, nscalar, chunked):
-    """Interpolate nd array with multiple independant indexers
+    """Interpolate nd array with multiple independent indexers
 
     It should do a series of 1d interpolation
     """
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index 4121b62a9e8..3721c92317d 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -116,7 +116,7 @@ def test_interpolate_pd_compat():
 @pytest.mark.parametrize("method", ["barycentric", "krog", "pchip", "spline", "akima"])
 def test_scipy_methods_function(method):
     # Note: Pandas does some wacky things with these methods and the full
-    # integration tests wont work.
+    # integration tests won't work.
     da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True)
     actual = da.interpolate_na(method=method, dim="time")
     assert (da.count("time") <= actual.count("time")).all()
@@ -255,19 +255,30 @@ def test_interpolate():
     assert_equal(actual, expected)
 
 
-def test_interpolate_nonans():
-
-    vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64)
-    expected = xr.DataArray(vals, dims="x")
-    actual = expected.interpolate_na(dim="x")
-    assert_equal(actual, expected)
-
-
 @requires_scipy
-def test_interpolate_allnans():
-    vals = np.full(6, np.nan, dtype=np.float64)
+@pytest.mark.parametrize(
+    "method,vals",
+    [
+        pytest.param(method, vals, id=f"{desc}:{method}")
+        for method in [
+            "linear",
+            "nearest",
+            "zero",
+            "slinear",
+            "quadratic",
+            "cubic",
+            "polynomial",
+        ]
+        for (desc, vals) in [
+            ("no nans", np.array([1, 2, 3, 4, 5, 6], dtype=np.float64)),
+            ("one nan", np.array([1, np.nan, np.nan], dtype=np.float64)),
+            ("all nans", np.full(6, np.nan, dtype=np.float64)),
+        ]
+    ],
+)
+def test_interp1d_fastrack(method, vals):
     expected = xr.DataArray(vals, dims="x")
-    actual = expected.interpolate_na(dim="x")
+    actual = expected.interpolate_na(dim="x", method=method)
 
     assert_equal(actual, expected)
 
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index 3088b7e109c..8ded4c6515f 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -526,7 +526,7 @@ def test__infer_interval_breaks_logscale_invalid_coords(self):
         x = np.linspace(0, 5, 6)
         with pytest.raises(ValueError):
             _infer_interval_breaks(x, scale="log")
-        # Check if error is raised after nagative values in the array
+        # Check if error is raised after negative values in the array
         x = np.linspace(-5, 5, 11)
         with pytest.raises(ValueError):
             _infer_interval_breaks(x, scale="log")
@@ -550,7 +550,7 @@ def test_geo_data(self):
                 [-137.85, -120.99, -103.28, -85.28, -67.62],
             ]
         )
-        data = np.sqrt(lon ** 2 + lat ** 2)
+        data = np.sqrt(lon**2 + lat**2)
         da = DataArray(
             data,
             dims=("y", "x"),
@@ -1506,7 +1506,7 @@ def test_convenient_facetgrid(self):
             else:
                 assert "" == ax.get_xlabel()
 
-        # Infering labels
+        # Inferring labels
         g = self.plotfunc(d, col="z", col_wrap=2)
         assert_array_equal(g.axes.shape, [2, 2])
         for (y, x), ax in np.ndenumerate(g.axes):
@@ -1986,7 +1986,7 @@ def test_convenient_facetgrid(self):
             assert "y" == ax.get_ylabel()
             assert "x" == ax.get_xlabel()
 
-        # Infering labels
+        # Inferring labels
         g = self.plotfunc(d, col="z", col_wrap=2)
         assert_array_equal(g.axes.shape, [2, 2])
         for (y, x), ax in np.ndenumerate(g.axes):
@@ -2886,8 +2886,8 @@ def test_plot_transposes_properly(plotfunc):
 def test_facetgrid_single_contour():
     # regression test for GH3569
     x, y = np.meshgrid(np.arange(12), np.arange(12))
-    z = xr.DataArray(np.sqrt(x ** 2 + y ** 2))
-    z2 = xr.DataArray(np.sqrt(x ** 2 + y ** 2) + 1)
+    z = xr.DataArray(np.sqrt(x**2 + y**2))
+    z2 = xr.DataArray(np.sqrt(x**2 + y**2) + 1)
     ds = xr.concat([z, z2], dim="time")
     ds["time"] = [0, 1]
 
diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py
index 4d1eee6363d..218ed1ea2e5 100644
--- a/xarray/tests/test_plugins.py
+++ b/xarray/tests/test_plugins.py
@@ -1,19 +1,11 @@
-import sys
+from importlib.metadata import EntryPoint
 from unittest import mock
 
 import pytest
 
 from xarray.backends import common, plugins
 
-if sys.version_info >= (3, 8):
-    from importlib.metadata import EntryPoint
-
-    importlib_metadata_mock = "importlib.metadata"
-else:
-    # if the fallback library is missing, we are doomed.
-    from importlib_metadata import EntryPoint
-
-    importlib_metadata_mock = "importlib_metadata"
+importlib_metadata_mock = "importlib.metadata"
 
 
 class DummyBackendEntrypointArgs(common.BackendEntrypoint):
@@ -91,7 +83,7 @@ def test_backends_dict_from_pkg() -> None:
     entrypoints = [EntryPoint(name, value, group) for name, value, group in specs]
     engines = plugins.backends_dict_from_pkg(entrypoints)
     assert len(engines) == 2
-    assert engines.keys() == set(("engine1", "engine2"))
+    assert engines.keys() == {"engine1", "engine2"}
 
 
 def test_set_missing_parameters() -> None:
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index ad51534ddbf..bf4d39105c4 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -703,8 +703,8 @@ def test_dataset_repr(self):
         )
         assert expected == repr(ds)
 
+    @requires_dask
     def test_sparse_dask_dataset_repr(self):
-        pytest.importorskip("dask", minversion="2.0")
         ds = xr.Dataset(
             data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))}
         ).chunk()
@@ -800,7 +800,7 @@ def test_resample(self):
         t1 = xr.DataArray(
             np.linspace(0, 11, num=12),
             coords=[
-                pd.date_range("15/12/1999", periods=12, freq=pd.DateOffset(months=1))
+                pd.date_range("1999-12-15", periods=12, freq=pd.DateOffset(months=1))
             ],
             dims="time",
         )
diff --git a/xarray/tests/test_ufuncs.py b/xarray/tests/test_ufuncs.py
index 3379fba44f8..590ae9ae003 100644
--- a/xarray/tests/test_ufuncs.py
+++ b/xarray/tests/test_ufuncs.py
@@ -8,7 +8,7 @@
 
 from . import assert_array_equal
 from . import assert_identical as assert_identical_
-from . import mock
+from . import assert_no_warnings, mock
 
 
 def assert_identical(a, b):
@@ -164,9 +164,8 @@ def test_xarray_ufuncs_deprecation():
     with pytest.warns(FutureWarning, match="xarray.ufuncs"):
         xu.cos(xr.DataArray([0, 1]))
 
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         xu.angle(xr.DataArray([0, 1]))
-    assert len(record) == 0
 
 
 @pytest.mark.filterwarnings("ignore::RuntimeWarning")
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index faaf3e50a81..37cec1708d1 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -2429,10 +2429,7 @@ def test_binary_operations(self, func, dtype):
         (
             pytest.param(operator.lt, id="less_than"),
             pytest.param(operator.ge, id="greater_equal"),
-            pytest.param(
-                operator.eq,
-                id="equal",
-            ),
+            pytest.param(operator.eq, id="equal"),
         ),
     )
     @pytest.mark.parametrize(
@@ -5466,7 +5463,7 @@ def test_grouped_operations(self, func, variant, dtype):
     def test_content_manipulation(self, func, variant, dtype):
         variants = {
             "data": (
-                (unit_registry.m ** 3, unit_registry.Pa, unit_registry.degK),
+                (unit_registry.m**3, unit_registry.Pa, unit_registry.degK),
                 1,
                 1,
             ),
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 3267af8b45b..b8e2f6f4582 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -7,6 +7,7 @@
 import pandas as pd
 import pytest
 import pytz
+from packaging.version import Version
 
 from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options
 from xarray.core import dtypes, duck_array_ops, indexing
@@ -32,6 +33,7 @@
     assert_array_equal,
     assert_equal,
     assert_identical,
+    assert_no_warnings,
     raise_if_dask_computes,
     requires_cupy,
     requires_dask,
@@ -232,7 +234,7 @@ def __hash__(self):
                 return hash(self.item)
 
             def __repr__(self):
-                return "{}(item={!r})".format(type(self).__name__, self.item)
+                return f"{type(self).__name__}(item={self.item!r})"
 
         item = HashableItemWrapper((1, 2, 3))
         x = self.cls("x", [item])
@@ -343,8 +345,8 @@ def test_1d_math(self):
         assert_identical(base_v, 0 + v)
         assert_identical(base_v, v * 1)
         # binary ops with numpy arrays
-        assert_array_equal((v * x).values, x ** 2)
-        assert_array_equal((x * v).values, x ** 2)
+        assert_array_equal((v * x).values, x**2)
+        assert_array_equal((x * v).values, x**2)
         assert_array_equal(v - y, v - 1)
         assert_array_equal(y - v, 1 - v)
         # verify attributes are dropped
@@ -358,7 +360,7 @@ def test_1d_math(self):
         assert_array_equal((v * w).values, x * y)
 
         # something complicated
-        assert_array_equal((v ** 2 * w - 1 + x).values, x ** 2 * y - 1 + x)
+        assert_array_equal((v**2 * w - 1 + x).values, x**2 * y - 1 + x)
         # make sure dtype is preserved (for Index objects)
         assert float == (+v).dtype
         assert float == (+v).values.dtype
@@ -1019,7 +1021,7 @@ def test_datetime64_conversion_scalar(self):
             assert v.values.dtype == np.dtype("datetime64[ns]")
 
     def test_timedelta64_conversion_scalar(self):
-        expected = np.timedelta64(24 * 60 * 60 * 10 ** 9, "ns")
+        expected = np.timedelta64(24 * 60 * 60 * 10**9, "ns")
         for values in [
             np.timedelta64(1, "D"),
             pd.Timedelta("1 day"),
@@ -1048,7 +1050,7 @@ def test_0d_timedelta(self):
         for td in [pd.to_timedelta("1s"), np.timedelta64(1, "s")]:
             v = Variable([], td)
             assert v.dtype == np.dtype("timedelta64[ns]")
-            assert v.values == np.timedelta64(10 ** 9, "ns")
+            assert v.values == np.timedelta64(10**9, "ns")
 
     def test_equals_and_identical(self):
         d = np.random.rand(10, 3)
@@ -1698,16 +1700,20 @@ def raise_if_called(*args, **kwargs):
         with set_options(use_bottleneck=False):
             v.min()
 
-    @pytest.mark.parametrize("skipna", [True, False])
+    @pytest.mark.parametrize("skipna", [True, False, None])
     @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
     @pytest.mark.parametrize(
         "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
     )
     def test_quantile(self, q, axis, dim, skipna):
-        v = Variable(["x", "y"], self.d)
+
+        d = self.d.copy()
+        d[0, 0] = np.NaN
+
+        v = Variable(["x", "y"], d)
         actual = v.quantile(q, dim=dim, skipna=skipna)
-        _percentile_func = np.nanpercentile if skipna else np.percentile
-        expected = _percentile_func(self.d, np.array(q) * 100, axis=axis)
+        _percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
+        expected = _percentile_func(d, np.array(q) * 100, axis=axis)
         np.testing.assert_allclose(actual.values, expected)
 
     @requires_dask
@@ -1720,6 +1726,49 @@ def test_quantile_dask(self, q, axis, dim):
         expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis)
         np.testing.assert_allclose(actual.values, expected)
 
+    @pytest.mark.parametrize("method", ["midpoint", "lower"])
+    @pytest.mark.parametrize(
+        "use_dask", [pytest.param(True, marks=requires_dask), False]
+    )
+    def test_quantile_method(self, method, use_dask) -> None:
+
+        v = Variable(["x", "y"], self.d)
+        if use_dask:
+            v = v.chunk({"x": 2})
+
+        q = np.array([0.25, 0.5, 0.75])
+        actual = v.quantile(q, dim="y", method=method)
+
+        if Version(np.__version__) >= Version("1.22"):
+            expected = np.nanquantile(self.d, q, axis=1, method=method)  # type: ignore[call-arg]
+        else:
+            expected = np.nanquantile(self.d, q, axis=1, interpolation=method)  # type: ignore[call-arg]
+
+        if use_dask:
+            assert isinstance(actual.data, dask_array_type)
+
+        np.testing.assert_allclose(actual.values, expected)
+
+    @pytest.mark.parametrize("method", ["midpoint", "lower"])
+    def test_quantile_interpolation_deprecation(self, method) -> None:
+
+        v = Variable(["x", "y"], self.d)
+        q = np.array([0.25, 0.5, 0.75])
+
+        with pytest.warns(
+            FutureWarning,
+            match="`interpolation` argument to quantile was renamed to `method`",
+        ):
+            actual = v.quantile(q, dim="y", interpolation=method)
+
+        expected = v.quantile(q, dim="y", method=method)
+
+        np.testing.assert_allclose(actual.values, expected.values)
+
+        with warnings.catch_warnings(record=True):
+            with pytest.raises(TypeError, match="interpolation and method keywords"):
+                v.quantile(q, dim="y", interpolation=method, method=method)
+
     @requires_dask
     def test_quantile_chunked_dim_error(self):
         v = Variable(["x", "y"], self.d).chunk({"x": 2})
@@ -2161,6 +2210,12 @@ def test_dask_rolling(self, dim, window, center):
         assert actual.shape == expected.shape
         assert_equal(actual, expected)
 
+    @pytest.mark.xfail(
+        reason="https://github.com/pydata/xarray/issues/6209#issuecomment-1025116203"
+    )
+    def test_multiindex(self):
+        super().test_multiindex()
+
 
 @requires_sparse
 class TestVariableWithSparse:
@@ -2487,9 +2542,8 @@ def __init__(self, array):
 
 
 def test_raise_no_warning_for_nan_in_binary_ops():
-    with pytest.warns(None) as record:
+    with assert_no_warnings():
         Variable("x", [1, 2, np.NaN]) > 0
-    assert len(record) == 0
 
 
 class TestBackendIndexing:
@@ -2607,10 +2661,14 @@ def test_from_dask(self, Var):
 
     @requires_pint
     def test_from_pint(self, Var):
-        from pint import Quantity
+        import pint
 
         arr = np.array([1, 2, 3])
-        v = Var("x", Quantity(arr, units="m"))
+
+        # IndexVariable strips the unit
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=pint.UnitStrippedWarning)
+            v = Var("x", pint.Quantity(arr, units="m"))
 
         assert_identical(v.as_numpy(), Var("x", arr))
         np.testing.assert_equal(v.to_numpy(), arr)
@@ -2643,11 +2701,15 @@ def test_from_cupy(self, Var):
     @requires_pint
     def test_from_pint_wrapping_dask(self, Var):
         import dask
-        from pint import Quantity
+        import pint
 
         arr = np.array([1, 2, 3])
         d = dask.array.from_array(np.array([1, 2, 3]))
-        v = Var("x", Quantity(d, units="m"))
+
+        # IndexVariable strips the unit
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=pint.UnitStrippedWarning)
+            v = Var("x", pint.Quantity(d, units="m"))
 
         result = v.as_numpy()
         assert_identical(result, Var("x", arr))
diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py
index 36923ed49c3..1f065228bc4 100644
--- a/xarray/tests/test_weighted.py
+++ b/xarray/tests/test_weighted.py
@@ -393,7 +393,7 @@ def expected_weighted(da, weights, dim, skipna, operation):
         return weighted_mean
 
     demeaned = da - weighted_mean
-    sum_of_squares = ((demeaned ** 2) * weights).sum(dim=dim, skipna=skipna)
+    sum_of_squares = ((demeaned**2) * weights).sum(dim=dim, skipna=skipna)
 
     if operation == "sum_of_squares":
         return sum_of_squares
@@ -443,6 +443,7 @@ def check_weighted_operations(data, weights, dim, skipna):
 @pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None))
 @pytest.mark.parametrize("add_nans", (True, False))
 @pytest.mark.parametrize("skipna", (None, True, False))
+@pytest.mark.filterwarnings("ignore:invalid value encountered in sqrt")
 def test_weighted_operations_3D(dim, add_nans, skipna):
 
     dims = ("a", "b", "c")
@@ -480,6 +481,7 @@ def test_weighted_operations_nonequal_coords():
 @pytest.mark.parametrize("shape_weights", ((4,), (4, 4), (4, 4, 4)))
 @pytest.mark.parametrize("add_nans", (True, False))
 @pytest.mark.parametrize("skipna", (None, True, False))
+@pytest.mark.filterwarnings("ignore:invalid value encountered in sqrt")
 def test_weighted_operations_different_shapes(
     shape_data, shape_weights, add_nans, skipna
 ):
diff --git a/xarray/tutorial.py b/xarray/tutorial.py
index b0a3e110d84..fd8150bf8a6 100644
--- a/xarray/tutorial.py
+++ b/xarray/tutorial.py
@@ -33,8 +33,8 @@ def _construct_cache_dir(path):
 
 external_urls = {}  # type: dict
 external_rasterio_urls = {
-    "RGB.byte": "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/RGB.byte.tif",
-    "shade": "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/shade.tif",
+    "RGB.byte": "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/RGB.byte.tif",
+    "shade": "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/shade.tif",
 }
 file_formats = {
     "air_temperature": 3,
@@ -185,7 +185,7 @@ def open_rasterio(
 
     References
     ----------
-    .. [1] https://github.com/mapbox/rasterio
+    .. [1] https://github.com/rasterio/rasterio
     """
     try:
         import pooch
@@ -226,18 +226,27 @@ def load_dataset(*args, **kwargs):
         return ds.load()
 
 
-def scatter_example_dataset():
+def scatter_example_dataset(*, seed=None) -> Dataset:
+    """
+    Create an example dataset.
+
+    Parameters
+    ----------
+    seed : int, optional
+        Seed for the random number generation.
+    """
+    rng = np.random.default_rng(seed)
     A = DataArray(
         np.zeros([3, 11, 4, 4]),
         dims=["x", "y", "z", "w"],
-        coords=[
-            np.arange(3),
-            np.linspace(0, 1, 11),
-            np.arange(4),
-            0.1 * np.random.randn(4),
-        ],
+        coords={
+            "x": np.arange(3),
+            "y": np.linspace(0, 1, 11),
+            "z": np.arange(4),
+            "w": 0.1 * rng.standard_normal(4),
+        },
     )
-    B = 0.1 * A.x ** 2 + A.y ** 2.5 + 0.1 * A.z * A.w
+    B = 0.1 * A.x**2 + A.y**2.5 + 0.1 * A.z * A.w
     A = -0.1 * A.x + A.y / (5 + A.z) + A.w
     ds = Dataset({"A": A, "B": B})
     ds["w"] = ["one", "two", "three", "five"]
diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py
index 7f6eed55e9b..24907a158ef 100644
--- a/xarray/ufuncs.py
+++ b/xarray/ufuncs.py
@@ -53,9 +53,7 @@ def __call__(self, *args, **kwargs):
         new_args = args
         res = _UNDEFINED
         if len(args) > 2 or len(args) == 0:
-            raise TypeError(
-                "cannot handle {} arguments for {!r}".format(len(args), self._name)
-            )
+            raise TypeError(f"cannot handle {len(args)} arguments for {self._name!r}")
         elif len(args) == 1:
             if isinstance(args[0], _xarray_types):
                 res = args[0]._unary_op(self)