diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000000..30c1e18f33c
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,2 @@
+github: numfocus
+custom: http://numfocus.org/donate-to-xarray
diff --git a/doc/api.rst b/doc/api.rst
index b176e3d5e3f..d2309f28226 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -94,7 +94,7 @@ Dataset contents
    Dataset.rename_dims
    Dataset.swap_dims
    Dataset.expand_dims
-   Dataset.drop
+   Dataset.drop_vars
    Dataset.drop_dims
    Dataset.set_coords
    Dataset.reset_coords
@@ -118,6 +118,7 @@ Indexing
    Dataset.loc
    Dataset.isel
    Dataset.sel
+   Dataset.drop_sel
    Dataset.head
    Dataset.tail
    Dataset.thin
@@ -154,7 +155,7 @@ Computation
 .. autosummary::
    :toctree: generated/
 
-   Dataset.apply
+   Dataset.map
    Dataset.reduce
    Dataset.groupby
    Dataset.groupby_bins
@@ -263,7 +264,7 @@ DataArray contents
    DataArray.rename
    DataArray.swap_dims
    DataArray.expand_dims
-   DataArray.drop
+   DataArray.drop_vars
    DataArray.reset_coords
    DataArray.copy
 
@@ -283,6 +284,7 @@ Indexing
    DataArray.loc
    DataArray.isel
    DataArray.sel
+   DataArray.drop_sel
    DataArray.head
    DataArray.tail
    DataArray.thin
@@ -542,10 +544,10 @@ GroupBy objects
    :toctree: generated/
 
    core.groupby.DataArrayGroupBy
-   core.groupby.DataArrayGroupBy.apply
+   core.groupby.DataArrayGroupBy.map
    core.groupby.DataArrayGroupBy.reduce
    core.groupby.DatasetGroupBy
-   core.groupby.DatasetGroupBy.apply
+   core.groupby.DatasetGroupBy.map
    core.groupby.DatasetGroupBy.reduce
 
 Rolling objects
@@ -566,7 +568,7 @@ Resample objects
 ================
 
 Resample objects also implement the GroupBy interface
-(methods like ``apply()``, ``reduce()``, ``mean()``, ``sum()``, etc.).
+(methods like ``map()``, ``reduce()``, ``mean()``, ``sum()``, etc.).
 
 .. autosummary::
    :toctree: generated/
diff --git a/doc/computation.rst b/doc/computation.rst
index ae5f4bc5c66..240a1e5704b 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -95,6 +95,9 @@ for filling missing values via 1D interpolation.
 Note that xarray slightly diverges from the pandas ``interpolate`` syntax by
 providing the ``use_coordinate`` keyword which facilitates a clear specification
 of which values to use as the index in the interpolation.
+xarray also provides the ``max_gap`` keyword argument to limit the interpolation to
+data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na`
+for more.
 
 Aggregation
 ===========
@@ -183,7 +186,7 @@ a value when aggregating:
 
    Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects.
 
-.. _bottleneck: https://github.com/kwgoodman/bottleneck/
+.. _bottleneck: https://github.com/pydata/bottleneck/
 
 We can also manually iterate through ``Rolling`` objects:
 
@@ -462,13 +465,13 @@ Datasets support most of the same methods found on data arrays:
     abs(ds)
 
 Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or
-alternatively you can use :py:meth:`~xarray.Dataset.apply` to apply a function
+alternatively you can use :py:meth:`~xarray.Dataset.map` to map a function
 to each variable in a dataset:
 
 .. ipython:: python
 
     np.sin(ds)
-    ds.apply(np.sin)
+    ds.map(np.sin)
 
 Datasets also use looping over variables for *broadcasting* in binary
 arithmetic. You can do arithmetic between any ``DataArray`` and a dataset:
diff --git a/doc/conf.py b/doc/conf.py
index 7c1557a1e66..0e04f8ccde8 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -340,9 +340,10 @@
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
-    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
-    "iris": ("http://scitools.org.uk/iris/docs/latest/", None),
-    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
-    "numba": ("https://numba.pydata.org/numba-doc/latest/", None),
-    "matplotlib": ("https://matplotlib.org/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
+    "iris": ("https://scitools.org.uk/iris/docs/latest", None),
+    "numpy": ("https://docs.scipy.org/doc/numpy", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
+    "numba": ("https://numba.pydata.org/numba-doc/latest", None),
+    "matplotlib": ("https://matplotlib.org", None),
 }
diff --git a/doc/dask.rst b/doc/dask.rst
index 5bdbf779463..11f378aa376 100644
--- a/doc/dask.rst
+++ b/doc/dask.rst
@@ -292,7 +292,7 @@ For the best performance when using Dask's multi-threaded scheduler, wrap a
 function that already releases the global interpreter lock, which fortunately
 already includes most NumPy and Scipy functions. Here we show an example
 using NumPy operations and a fast function from
-`bottleneck <https://github.com/kwgoodman/bottleneck>`__, which
+`bottleneck <https://github.com/pydata/bottleneck>`__, which
 we use to calculate `Spearman's rank-correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`__:
 
 .. code-block:: python
diff --git a/doc/groupby.rst b/doc/groupby.rst
index 52a27f4f160..f5943703765 100644
--- a/doc/groupby.rst
+++ b/doc/groupby.rst
@@ -35,10 +35,11 @@ Let's create a simple example dataset:
 
 .. ipython:: python
 
-    ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 3))},
-                    coords={'x': [10, 20, 30, 40],
-                            'letters': ('x', list('abba'))})
-    arr = ds['foo']
+    ds = xr.Dataset(
+        {"foo": (("x", "y"), np.random.rand(4, 3))},
+        coords={"x": [10, 20, 30, 40], "letters": ("x", list("abba"))},
+    )
+    arr = ds["foo"]
     ds
 
 If we groupby the name of a variable or coordinate in a dataset (we can also
@@ -93,7 +94,7 @@ Apply
 ~~~~~
 
 To apply a function to each group, you can use the flexible
-:py:meth:`~xarray.DatasetGroupBy.apply` method. The resulting objects are automatically
+:py:meth:`~xarray.DatasetGroupBy.map` method. The resulting objects are automatically
 concatenated back together along the group axis:
 
 .. ipython:: python
@@ -101,7 +102,7 @@ concatenated back together along the group axis:
     def standardize(x):
         return (x - x.mean()) / x.std()
 
-    arr.groupby('letters').apply(standardize)
+    arr.groupby('letters').map(standardize)
 
 GroupBy objects also have a :py:meth:`~xarray.DatasetGroupBy.reduce` method and
 methods like :py:meth:`~xarray.DatasetGroupBy.mean` as shortcuts for applying an
@@ -202,7 +203,7 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen
         dims=['ny','nx'])
     da
     da.groupby('lon').sum(...)
-    da.groupby('lon').apply(lambda x: x - x.mean(), shortcut=False)
+    da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False)
 
 Because multidimensional groups have the ability to generate a very large
 number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins`
diff --git a/doc/howdoi.rst b/doc/howdoi.rst
index 721d1323e73..91644ba2718 100644
--- a/doc/howdoi.rst
+++ b/doc/howdoi.rst
@@ -44,7 +44,7 @@ How do I ...
    * - convert a possibly irregularly sampled timeseries to a regularly sampled timeseries
      - :py:meth:`DataArray.resample`, :py:meth:`Dataset.resample` (see :ref:`resampling` for more)
    * - apply a function on all data variables in a Dataset
-     - :py:meth:`Dataset.apply`
+     - :py:meth:`Dataset.map`
    * - write xarray objects with complex values to a netCDF file
      - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True``
    * - make xarray objects look like other xarray objects
diff --git a/doc/indexing.rst b/doc/indexing.rst
index ace960689a8..e8482ac66b3 100644
--- a/doc/indexing.rst
+++ b/doc/indexing.rst
@@ -209,12 +209,16 @@ simultaneously, returning a new dataset:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 3),
-                      [('time', pd.date_range('2000-01-01', periods=4)),
-                       ('space', ['IA', 'IL', 'IN'])])
-    ds = da.to_dataset(name='foo')
+    da = xr.DataArray(
+        np.random.rand(4, 3),
+        [
+            ("time", pd.date_range("2000-01-01", periods=4)),
+            ("space", ["IA", "IL", "IN"]),
+        ],
+    )
+    ds = da.to_dataset(name="foo")
     ds.isel(space=[0], time=[0])
-    ds.sel(time='2000-01-01')
+    ds.sel(time="2000-01-01")
 
 Positional indexing on a dataset is not supported because the ordering of
 dimensions in a dataset is somewhat ambiguous (it can vary between different
@@ -222,7 +226,6 @@ arrays). However, you can do normal indexing with dimension names:
 
 .. ipython:: python
 
-
     ds[dict(space=[0], time=[0])]
     ds.loc[dict(time='2000-01-01')]
 
@@ -248,7 +251,6 @@ Any variables with these dimensions are also dropped:
 
     ds.drop_dims('time')
 
-
 .. _masking with where:
 
 Masking with ``where``
@@ -326,8 +328,12 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
-                      coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
+
+    da = xr.DataArray(
+        np.arange(12).reshape((3, 4)),
+        dims=["x", "y"],
+        coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
+    )
     da
     da[[0, 1], [1, 1]]
 
@@ -410,43 +416,56 @@ can use indexing with ``.loc`` :
 
 .. ipython:: python
 
-    ds = xr.tutorial.open_dataset('air_temperature')
+    ds = xr.tutorial.open_dataset("air_temperature")
 
-    #add an empty 2D dataarray
-    ds['empty']= xr.full_like(ds.air.mean('time'),fill_value=0)
+    # add an empty 2D dataarray
+    ds["empty"] = xr.full_like(ds.air.mean("time"), fill_value=0)
 
-    #modify one grid point using loc()
-    ds['empty'].loc[dict(lon=260, lat=30)] = 100
+    # modify one grid point using loc()
+    ds["empty"].loc[dict(lon=260, lat=30)] = 100
 
-    #modify a 2D region using loc()
-    lc = ds.coords['lon']
-    la = ds.coords['lat']
-    ds['empty'].loc[dict(lon=lc[(lc>220)&(lc<260)], lat=la[(la>20)&(la<60)])] = 100
+    # modify a 2D region using loc()
+    lc = ds.coords["lon"]
+    la = ds.coords["lat"]
+    ds["empty"].loc[
+        dict(lon=lc[(lc > 220) & (lc < 260)], lat=la[(la > 20) & (la < 60)])
+    ] = 100
 
 or :py:meth:`~xarray.where`:
 
 .. ipython:: python
 
-    #modify one grid point using xr.where()
-    ds['empty'] = xr.where((ds.coords['lat']==20)&(ds.coords['lon']==260), 100, ds['empty'])
+    # modify one grid point using xr.where()
+    ds["empty"] = xr.where(
+        (ds.coords["lat"] == 20) & (ds.coords["lon"] == 260), 100, ds["empty"]
+    )
+
+    # or modify a 2D region using xr.where()
+    mask = (
+        (ds.coords["lat"] > 20)
+        & (ds.coords["lat"] < 60)
+        & (ds.coords["lon"] > 220)
+        & (ds.coords["lon"] < 260)
+    )
+    ds["empty"] = xr.where(mask, 100, ds["empty"])
 
-    #or modify a 2D region using xr.where()
-    mask = (ds.coords['lat']>20)&(ds.coords['lat']<60)&(ds.coords['lon']>220)&(ds.coords['lon']<260)
-    ds['empty'] = xr.where(mask, 100, ds['empty'])
 
 
 Vectorized indexing can also be used to assign values to xarray object.
 
 .. ipython:: python
 
-    da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
-                      coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
+    da = xr.DataArray(
+        np.arange(12).reshape((3, 4)),
+        dims=["x", "y"],
+        coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
+    )
     da
     da[0] = -1  # assignment with broadcasting
     da
 
-    ind_x = xr.DataArray([0, 1], dims=['x'])
-    ind_y = xr.DataArray([0, 1], dims=['y'])
+    ind_x = xr.DataArray([0, 1], dims=["x"])
+    ind_y = xr.DataArray([0, 1], dims=["y"])
     da[ind_x, ind_y] = -2  # assign -2 to (ix, iy) = (0, 0) and (1, 1)
     da
 
@@ -508,10 +527,10 @@ flexible indexing. The following is an example of the pointwise indexing:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y'])
+    da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"])
     da
-    da.isel(x=xr.DataArray([0, 1, 6], dims='z'),
-            y=xr.DataArray([0, 1, 0], dims='z'))
+    da.isel(x=xr.DataArray([0, 1, 6], dims="z"), y=xr.DataArray([0, 1, 0], dims="z"))
+
 
 where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected
 and mapped along a new dimension ``z``.
@@ -521,23 +540,27 @@ you can supply a :py:class:`~xarray.DataArray` with a coordinate,
 
 .. ipython:: python
 
-    da.isel(x=xr.DataArray([0, 1, 6], dims='z',
-                           coords={'z': ['a', 'b', 'c']}),
-            y=xr.DataArray([0, 1, 0], dims='z'))
-
+    da.isel(
+        x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}),
+        y=xr.DataArray([0, 1, 0], dims="z"),
+    )
+    
 Analogously, label-based pointwise-indexing is also possible by the ``.sel``
 method:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 3),
-                      [('time', pd.date_range('2000-01-01', periods=4)),
-                       ('space', ['IA', 'IL', 'IN'])])
-    times = xr.DataArray(pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']),
-                         dims='new_time')
-    da.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']),
-           time=times)
-
+    da = xr.DataArray(
+        np.random.rand(4, 3),
+        [
+            ("time", pd.date_range("2000-01-01", periods=4)),
+            ("space", ["IA", "IL", "IN"]),
+        ],
+    )
+    times = xr.DataArray(
+        pd.to_datetime(["2000-01-03", "2000-01-02", "2000-01-01"]), dims="new_time"
+    )
+    da.sel(space=xr.DataArray(["IA", "IL", "IN"], dims=["new_time"]), time=times)
 
 .. _align and reindex:
 
@@ -635,12 +658,16 @@ through the :py:attr:`~xarray.DataArray.indexes` attribute.
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 3),
-                      [('time', pd.date_range('2000-01-01', periods=4)),
-                       ('space', ['IA', 'IL', 'IN'])])
+    da = xr.DataArray(
+        np.random.rand(4, 3),
+        [
+            ("time", pd.date_range("2000-01-01", periods=4)),
+            ("space", ["IA", "IL", "IN"]),
+        ],
+    )
     da
     da.indexes
-    da.indexes['time']
+    da.indexes["time"]
 
 Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension,
 falling back to a default :py:class:`pandas.RangeIndex` if it has no coordinate
@@ -694,32 +721,31 @@ pandas:
 
 .. ipython:: python
 
-  midx = pd.MultiIndex.from_product([list('abc'), [0, 1]],
-                                    names=('one', 'two'))
-  mda = xr.DataArray(np.random.rand(6, 3),
-                     [('x', midx), ('y', range(3))])
-  mda
-  mda.sel(x=(list('ab'), [0]))
+
+    midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two"))
+    mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))])
+    mda
+    mda.sel(x=(list("ab"), [0]))
 
 You can also select multiple elements by providing a list of labels or tuples or
 a slice of tuples:
 
 .. ipython:: python
 
-  mda.sel(x=[('a', 0), ('b', 1)])
+    mda.sel(x=[('a', 0), ('b', 1)])
 
 Additionally, xarray supports dictionaries:
 
 .. ipython:: python
 
-  mda.sel(x={'one': 'a', 'two': 0})
+    mda.sel(x={'one': 'a', 'two': 0})
 
 For convenience, ``sel`` also accepts multi-index levels directly
 as keyword arguments:
 
 .. ipython:: python
 
-  mda.sel(one='a', two=0)
+    mda.sel(one='a', two=0)
 
 Note that using ``sel`` it is not possible to mix a dimension
 indexer with level indexers for that dimension
@@ -731,7 +757,7 @@ multi-index is reduced to a single index.
 
 .. ipython:: python
 
-  mda.loc[{'one': 'a'}, ...]
+    mda.loc[{'one': 'a'}, ...]
 
 Unlike pandas, xarray does not guess whether you provide index levels or
 dimensions when using ``loc`` in some ambiguous cases. For example, for
diff --git a/doc/installing.rst b/doc/installing.rst
index 0c5e8916ca3..219cf109efe 100644
--- a/doc/installing.rst
+++ b/doc/installing.rst
@@ -43,7 +43,7 @@ For accelerating xarray
 
 - `scipy <http://scipy.org/>`__: necessary to enable the interpolation features for
   xarray objects
-- `bottleneck <https://github.com/kwgoodman/bottleneck>`__: speeds up
+- `bottleneck <https://github.com/pydata/bottleneck>`__: speeds up
   NaN-skipping and rolling window aggregations by a large factor
 - `numbagg <https://github.com/shoyer/numbagg>`_: for exponential rolling
   window operations
diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst
index 7d84199323d..741b3d1a5fe 100644
--- a/doc/quick-overview.rst
+++ b/doc/quick-overview.rst
@@ -142,7 +142,7 @@ xarray supports grouped operations using a very similar API to pandas (see :ref:
     labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels')
     labels
     data.groupby(labels).mean('y')
-    data.groupby(labels).apply(lambda x: x - x.min())
+    data.groupby(labels).map(lambda x: x - x.min())
 
 Plotting
 --------
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index aa5f35b40f1..cb066d40a2d 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -38,12 +38,26 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
+
+- Added the ``fill_value`` option to :py:meth:`~xarray.DataArray.unstack` and
+  :py:meth:`~xarray.Dataset.unstack` (:issue:`3518`).
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
+  :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data
+  gap that will be filled by interpolation. By `Deepak Cherian <https://github.com/dcherian>`_.
 - :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels.
   :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for 
   dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible 
   option for dropping either labels or variables, but using the more specific methods is encouraged.
   (:pull:`3475`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
+- :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` have been added for 
+  mapping / applying a function over each item in the collection, reflecting the widely used
+  and least surprising name for this operation.
+  The existing ``apply`` methods remain for backward compatibility, though using the ``map``
+  methods is encouraged.
+  (:pull:`3459`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
 - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`)
   to represent all 'other' dimensions. For example, to move one dimension to the front,
   use `.transpose('x', ...)`. (:pull:`3421`)
@@ -66,15 +80,25 @@ New Features
   for xarray objects. Note that xarray objects with a dask.array backend already used
   deterministic hashing in previous releases; this change implements it when whole
   xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
-  invoked. (:issue:`3378`, :pull:`3446`)
+  invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
 - Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen`
   and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`)
   By `Deepak Cherian <https://github.com/dcherian/>`_
+- Add the documented-but-missing :py:meth:`xarray.core.groupby.DatasetGroupBy.quantile`.
+  (:issue:`3525`, :pull:`3527`). By `Justus Magin <https://github.com/keewis>`_.
 
 Bug fixes
 ~~~~~~~~~
+- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when 
+  calling :py:meth:`Dataset.rename` (also :py:meth:`Dataset.rename_dims`
+  and :py:meth:`xr.Dataset.rename_vars`). By `Mathias Hauser <https://github.com/mathause>`_
+  (:issue:`3522`).
+- Fix a bug in `set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
+- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
+  By `Anderson Banihirwe <https://github.com/andersy005>`_. 
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
   but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
 - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).
@@ -84,9 +108,14 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
   By `Anderson Banihirwe <https://github.com/andersy005>`_.
+- Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
+  In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
   :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
+- Allow appending datetime and bool data variables to zarr stores.
+  (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -107,7 +136,8 @@ Internal Changes
 ~~~~~~~~~~~~~~~~
 
 - Added integration tests against `pint <https://pint.readthedocs.io/>`_.
-  (:pull:`3238`) by `Justus Magin <https://github.com/keewis>`_.
+  (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
+  by `Justus Magin <https://github.com/keewis>`_.
 
   .. note::
 
@@ -126,6 +156,9 @@ Internal Changes
 - Enable type checking on default sentinel values (:pull:`3472`)
   By `Maximilian Roos <https://github.com/max-sixty>`_
 
+- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
+
 .. _whats-new.0.14.0:
 
 v0.14.0 (14 Oct 2019)
@@ -213,6 +246,9 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix error in concatenating unlabeled dimensions (:pull:`3362`).
   By `Deepak Cherian <https://github.com/dcherian/>`_.
+- Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
+  specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
+  (:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -3732,7 +3768,7 @@ Breaking changes
   warnings: methods and attributes that were deprecated in xray v0.3 or earlier
   (e.g., ``dimensions``, ``attributes```) have gone away.
 
-.. _bottleneck: https://github.com/kwgoodman/bottleneck
+.. _bottleneck: https://github.com/pydata/bottleneck
 
 Enhancements
 ~~~~~~~~~~~~
diff --git a/setup.cfg b/setup.cfg
index fec2ca6bbe4..21158e3b0ee 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,7 @@
 [tool:pytest]
 python_files=test_*.py
 testpaths=xarray/tests properties
-# Fixed upstream in https://github.com/kwgoodman/bottleneck/pull/199
+# Fixed upstream in https://github.com/pydata/bottleneck/pull/199
 filterwarnings =
     ignore:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning
 env =
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index d23594fc675..945b3937c43 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -1234,6 +1234,8 @@ def _validate_datatypes_for_zarr_append(dataset):
     def check_dtype(var):
         if (
             not np.issubdtype(var.dtype, np.number)
+            and not np.issubdtype(var.dtype, np.datetime64)
+            and not np.issubdtype(var.dtype, np.bool)
             and not coding.strings.is_unicode_dtype(var.dtype)
             and not var.dtype == object
         ):
@@ -1241,8 +1243,9 @@ def check_dtype(var):
             raise ValueError(
                 "Invalid dtype for data variable: {} "
                 "dtype must be a subtype of number, "
-                "a fixed sized string, a fixed size "
-                "unicode string or an object".format(var)
+                "datetime, bool, a fixed sized string, "
+                "a fixed size unicode string or an "
+                "object".format(var)
             )
 
     for k in dataset.data_vars.values():
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 5f9c8932b6b..2b5f87ab0cd 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -8,7 +8,6 @@
 
 from ..core import dtypes, duck_array_ops, indexing
 from ..core.pycompat import dask_array_type
-from ..core.utils import equivalent
 from ..core.variable import Variable
 
 
@@ -152,18 +151,25 @@ def encode(self, variable, name=None):
         fv = encoding.get("_FillValue")
         mv = encoding.get("missing_value")
 
-        if fv is not None and mv is not None and not equivalent(fv, mv):
+        if (
+            fv is not None
+            and mv is not None
+            and not duck_array_ops.allclose_or_equiv(fv, mv)
+        ):
             raise ValueError(
-                "Variable {!r} has multiple fill values {}. "
-                "Cannot encode data. ".format(name, [fv, mv])
+                f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
             )
 
         if fv is not None:
+            # Ensure _FillValue is cast to same dtype as data's
+            encoding["_FillValue"] = data.dtype.type(fv)
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
             if not pd.isnull(fill_value):
                 data = duck_array_ops.fillna(data, fill_value)
 
         if mv is not None:
+            # Ensure missing_value is cast to same dtype as data's
+            encoding["missing_value"] = data.dtype.type(mv)
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
             if not pd.isnull(fill_value) and fv is None:
                 data = duck_array_ops.fillna(data, fill_value)
diff --git a/xarray/core/common.py b/xarray/core/common.py
index d372115ea57..2afe4b4c3a7 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -43,14 +43,12 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         if include_skipna:
 
             def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
-                return self.reduce(
-                    func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, axis, skipna=skipna, **kwargs)
 
         else:
 
             def wrapped_func(self, dim=None, axis=None, **kwargs):  # type: ignore
-                return self.reduce(func, dim, axis, allow_lazy=True, **kwargs)
+                return self.reduce(func, dim, axis, **kwargs)
 
         return wrapped_func
 
@@ -83,20 +81,13 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
 
             def wrapped_func(self, dim=None, skipna=None, **kwargs):
                 return self.reduce(
-                    func,
-                    dim,
-                    skipna=skipna,
-                    numeric_only=numeric_only,
-                    allow_lazy=True,
-                    **kwargs,
+                    func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs
                 )
 
         else:
 
             def wrapped_func(self, dim=None, **kwargs):  # type: ignore
-                return self.reduce(
-                    func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, numeric_only=numeric_only, **kwargs)
 
         return wrapped_func
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 3e4c7903180..23342fc5e0d 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -48,7 +48,7 @@
     assert_coordinate_consistent,
     remap_label_indexers,
 )
-from .dataset import Dataset, merge_indexes, split_indexes
+from .dataset import Dataset, split_indexes
 from .formatting import format_item
 from .indexes import Indexes, default_indexes
 from .merge import PANDAS_TYPES
@@ -249,14 +249,14 @@ class DataArray(AbstractArray, DataWithCoords):
         Dictionary for holding arbitrary metadata.
     """
 
-    _accessors: Optional[Dict[str, Any]]  # noqa
+    _cache: Dict[str, Any]
     _coords: Dict[Any, Variable]
     _indexes: Optional[Dict[Hashable, pd.Index]]
     _name: Optional[Hashable]
     _variable: Variable
 
     __slots__ = (
-        "_accessors",
+        "_cache",
         "_coords",
         "_file_obj",
         "_indexes",
@@ -373,7 +373,6 @@ def __init__(
         assert isinstance(coords, dict)
         self._coords = coords
         self._name = name
-        self._accessors = None
 
         # TODO(shoyer): document this argument, once it becomes part of the
         # public interface.
@@ -755,7 +754,9 @@ def reset_coords(
             return dataset
 
     def __dask_tokenize__(self):
-        return (type(self), self._variable, self._coords, self._name)
+        from dask.base import normalize_token
+
+        return normalize_token((type(self), self._variable, self._coords, self._name))
 
     def __dask_graph__(self):
         return self._to_temp_dataset().__dask_graph__()
@@ -920,7 +921,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray":
         Coordinates:
         * x        (x) <U1 'a' 'b' 'c'
 
-        See also
+        See Also
         --------
         pandas.DataFrame.copy
         """
@@ -1599,10 +1600,10 @@ def set_index(
         --------
         DataArray.reset_index
         """
-        _check_inplace(inplace)
-        indexes = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index")
-        coords, _ = merge_indexes(indexes, self._coords, set(), append=append)
-        return self._replace(coords=coords)
+        ds = self._to_temp_dataset().set_index(
+            indexes, append=append, inplace=inplace, **indexes_kwargs
+        )
+        return self._from_temp_dataset(ds)
 
     def reset_index(
         self,
@@ -1717,7 +1718,7 @@ def stack(
                    codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
                    names=['x', 'y'])
 
-        See also
+        See Also
         --------
         DataArray.unstack
         """
@@ -1725,7 +1726,9 @@ def stack(
         return self._from_temp_dataset(ds)
 
     def unstack(
-        self, dim: Union[Hashable, Sequence[Hashable], None] = None
+        self,
+        dim: Union[Hashable, Sequence[Hashable], None] = None,
+        fill_value: Any = dtypes.NA,
     ) -> "DataArray":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
@@ -1738,6 +1741,7 @@ def unstack(
         dim : hashable or sequence of hashable, optional
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
+        fill_value: value to be filled. By default, np.nan
 
         Returns
         -------
@@ -1765,11 +1769,11 @@ def unstack(
         >>> arr.identical(roundtripped)
         True
 
-        See also
+        See Also
         --------
         DataArray.stack
         """
-        ds = self._to_temp_dataset().unstack(dim)
+        ds = self._to_temp_dataset().unstack(dim, fill_value)
         return self._from_temp_dataset(ds)
 
     def to_unstacked_dataset(self, dim, level=0):
@@ -1923,6 +1927,11 @@ def drop(
         """Backward compatible method based on `drop_vars` and `drop_sel`
 
         Using either `drop_vars` or `drop_sel` is encouraged
+
+        See Also
+        --------
+        DataArray.drop_vars
+        DataArray.drop_sel
         """
         ds = self._to_temp_dataset().drop(labels, dim, errors=errors)
         return self._from_temp_dataset(ds)
@@ -2011,44 +2020,69 @@ def fillna(self, value: Any) -> "DataArray":
 
     def interpolate_na(
         self,
-        dim=None,
+        dim: Hashable = None,
         method: str = "linear",
         limit: int = None,
         use_coordinate: Union[bool, str] = True,
+        max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
         **kwargs: Any,
     ) -> "DataArray":
-        """Interpolate values according to different methods.
+        """Fill in NaNs by interpolating according to different methods.
 
         Parameters
         ----------
         dim : str
             Specifies the dimension along which to interpolate.
-        method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-                  'polynomial', 'barycentric', 'krog', 'pchip',
-                  'spline', 'akima'}, optional
+        method : str, optional
             String indicating which method to use for interpolation:
 
             - 'linear': linear interpolation (Default). Additional keyword
-              arguments are passed to ``numpy.interp``
-            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-              'polynomial': are passed to ``scipy.interpolate.interp1d``. If
-              method=='polynomial', the ``order`` keyword argument must also be
+              arguments are passed to :py:func:`numpy.interp`
+            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial':
+              are passed to :py:func:`scipy.interpolate.interp1d`. If
+              ``method='polynomial'``, the ``order`` keyword argument must also be
               provided.
-            - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their
-              respective``scipy.interpolate`` classes.
-        use_coordinate : boolean or str, default True
+            - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
+              respective :py:class:`scipy.interpolate` classes.
+        use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
-            eqaully-spaced along `dim`. If True, the IndexVariable `dim` is
-            used. If use_coordinate is a string, it specifies the name of a
+            eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is
+            used. If ``use_coordinate`` is a string, it specifies the name of a
             coordinate variariable to use as the index.
         limit : int, default None
             Maximum number of consecutive NaNs to fill. Must be greater than 0
-            or None for no limit.
+            or None for no limit. This filling is done regardless of the size of
+            the gap in the data. To only interpolate over gaps less than a given length,
+            see ``max_gap``.
+        max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
+            Maximum size of gap, a continuous sequence of NaNs, that will be filled.
+            Use None for no limit. When interpolating along a datetime64 dimension
+            and ``use_coordinate=True``, ``max_gap`` can be one of the following:
+
+            - a string that is valid input for pandas.to_timedelta
+            - a :py:class:`numpy.timedelta64` object
+            - a :py:class:`pandas.Timedelta` object
+            Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
+            dimensions has not been implemented yet. Gap length is defined as the difference
+            between coordinate values at the first data point after a gap and the last value
+            before a gap. For gaps at the beginning (end), gap length is defined as the difference
+            between coordinate values at the first (last) valid data point and the first (last) NaN.
+            For example, consider::
+
+                <xarray.DataArray (x: 9)>
+                array([nan, nan, nan,  1., nan, nan,  4., nan, nan])
+                Coordinates:
+                  * x        (x) int64 0 1 2 3 4 5 6 7 8
+
+            The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively
+        kwargs : dict, optional
+            parameters passed verbatim to the underlying interpolation function
 
         Returns
         -------
-        DataArray
+        interpolated: DataArray
+            Filled in DataArray.
 
         See also
         --------
@@ -2063,6 +2097,7 @@ def interpolate_na(
             method=method,
             limit=limit,
             use_coordinate=use_coordinate,
+            max_gap=max_gap,
             **kwargs,
         )
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 2cadc90334c..371e0d6bf26 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -204,6 +204,7 @@ def merge_indexes(
     """
     vars_to_replace: Dict[Hashable, Variable] = {}
     vars_to_remove: List[Hashable] = []
+    dims_to_replace: Dict[Hashable, Hashable] = {}
     error_msg = "{} is not the name of an existing variable."
 
     for dim, var_names in indexes.items():
@@ -244,7 +245,7 @@ def merge_indexes(
         if not len(names) and len(var_names) == 1:
             idx = pd.Index(variables[var_names[0]].values)
 
-        else:
+        else:  # MultiIndex
             for n in var_names:
                 try:
                     var = variables[n]
@@ -256,15 +257,22 @@ def merge_indexes(
                 levels.append(cat.categories)
 
             idx = pd.MultiIndex(levels, codes, names=names)
+            for n in names:
+                dims_to_replace[n] = dim
 
         vars_to_replace[dim] = IndexVariable(dim, idx)
         vars_to_remove.extend(var_names)
 
     new_variables = {k: v for k, v in variables.items() if k not in vars_to_remove}
     new_variables.update(vars_to_replace)
+
+    # update dimensions if necessary  GH: 3512
+    for k, v in new_variables.items():
+        if any(d in dims_to_replace for d in v.dims):
+            new_dims = [dims_to_replace.get(d, d) for d in v.dims]
+            new_variables[k] = v._replace(dims=new_dims)
     new_coord_names = coord_names | set(vars_to_replace)
     new_coord_names -= set(vars_to_remove)
-
     return new_variables, new_coord_names
 
 
@@ -411,8 +419,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords):
     coordinates used for label based indexing.
     """
 
-    _accessors: Optional[Dict[str, Any]]
     _attrs: Optional[Dict[Hashable, Any]]
+    _cache: Dict[str, Any]
     _coord_names: Set[Hashable]
     _dims: Dict[Hashable, int]
     _encoding: Optional[Dict[Hashable, Any]]
@@ -420,8 +428,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords):
     _variables: Dict[Hashable, Variable]
 
     __slots__ = (
-        "_accessors",
         "_attrs",
+        "_cache",
         "_coord_names",
         "_dims",
         "_encoding",
@@ -527,7 +535,6 @@ def __init__(
             data_vars, coords, compat=compat
         )
 
-        self._accessors = None
         self._attrs = dict(attrs) if attrs is not None else None
         self._file_obj = None
         self._encoding = None
@@ -652,7 +659,11 @@ def load(self, **kwargs) -> "Dataset":
         return self
 
     def __dask_tokenize__(self):
-        return (type(self), self._variables, self._coord_names, self._attrs)
+        from dask.base import normalize_token
+
+        return normalize_token(
+            (type(self), self._variables, self._coord_names, self._attrs)
+        )
 
     def __dask_graph__(self):
         graphs = {k: v.__dask_graph__() for k, v in self.variables.items()}
@@ -858,7 +869,6 @@ def _construct_direct(
         obj._attrs = attrs
         obj._file_obj = file_obj
         obj._encoding = encoding
-        obj._accessors = None
         return obj
 
     @classmethod
@@ -2655,7 +2665,7 @@ def _rename_indexes(self, name_dict, dims_set):
                     verify_integrity=False,
                 )
             else:
-                index = pd.Index(v, name=new_name)
+                index = v.rename(new_name)
             indexes[new_name] = index
         return indexes
 
@@ -3323,7 +3333,7 @@ def ensure_stackable(val):
 
         return data_array
 
-    def _unstack_once(self, dim: Hashable) -> "Dataset":
+    def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset":
         index = self.get_index(dim)
         index = index.remove_unused_levels()
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
@@ -3332,7 +3342,7 @@ def _unstack_once(self, dim: Hashable) -> "Dataset":
         if index.equals(full_idx):
             obj = self
         else:
-            obj = self.reindex({dim: full_idx}, copy=False)
+            obj = self.reindex({dim: full_idx}, copy=False, fill_value=fill_value)
 
         new_dim_names = index.names
         new_dim_sizes = [lev.size for lev in index.levels]
@@ -3358,7 +3368,11 @@ def _unstack_once(self, dim: Hashable) -> "Dataset":
             variables, coord_names=coord_names, indexes=indexes
         )
 
-    def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
+    def unstack(
+        self,
+        dim: Union[Hashable, Iterable[Hashable]] = None,
+        fill_value: Any = dtypes.NA,
+    ) -> "Dataset":
         """
         Unstack existing dimensions corresponding to MultiIndexes into
         multiple new dimensions.
@@ -3370,6 +3384,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
         dim : Hashable or iterable of Hashable, optional
             Dimension(s) over which to unstack. By default unstacks all
             MultiIndexes.
+        fill_value: value to be filled. By default, np.nan
 
         Returns
         -------
@@ -3407,7 +3422,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset":
 
         result = self.copy(deep=False)
         for dim in dims:
-            result = result._unstack_once(dim)
+            result = result._unstack_once(dim, fill_value)
         return result
 
     def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset":
@@ -3557,6 +3572,11 @@ def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs):
         """Backward compatible method based on `drop_vars` and `drop_sel`
 
         Using either `drop_vars` or `drop_sel` is encouraged
+
+        See Also
+        --------
+        Dataset.drop_vars
+        Dataset.drop_sel
         """
         if errors not in ["raise", "ignore"]:
             raise ValueError('errors must be either "raise" or "ignore"')
@@ -3891,42 +3911,65 @@ def interpolate_na(
         method: str = "linear",
         limit: int = None,
         use_coordinate: Union[bool, Hashable] = True,
+        max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
         **kwargs: Any,
     ) -> "Dataset":
-        """Interpolate values according to different methods.
+        """Fill in NaNs by interpolating according to different methods.
 
         Parameters
         ----------
-        dim : Hashable
+        dim : str
             Specifies the dimension along which to interpolate.
-        method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-                  'polynomial', 'barycentric', 'krog', 'pchip',
-                  'spline'}, optional
+        method : str, optional
             String indicating which method to use for interpolation:
 
             - 'linear': linear interpolation (Default). Additional keyword
-              arguments are passed to ``numpy.interp``
-            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
-              'polynomial': are passed to ``scipy.interpolate.interp1d``. If
-              method=='polynomial', the ``order`` keyword argument must also be
+              arguments are passed to :py:func:`numpy.interp`
+            - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial':
+              are passed to :py:func:`scipy.interpolate.interp1d`. If
+              ``method='polynomial'``, the ``order`` keyword argument must also be
               provided.
-            - 'barycentric', 'krog', 'pchip', 'spline': use their respective
-              ``scipy.interpolate`` classes.
-        use_coordinate : boolean or str, default True
+            - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their
+              respective :py:class:`scipy.interpolate` classes.
+        use_coordinate : bool, str, default True
             Specifies which index to use as the x values in the interpolation
             formulated as `y = f(x)`. If False, values are treated as if
-            eqaully-spaced along `dim`. If True, the IndexVariable `dim` is
-            used. If use_coordinate is a string, it specifies the name of a
+            eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is
+            used. If ``use_coordinate`` is a string, it specifies the name of a
             coordinate variariable to use as the index.
         limit : int, default None
             Maximum number of consecutive NaNs to fill. Must be greater than 0
-            or None for no limit.
-        kwargs : any
-            parameters passed verbatim to the underlying interplation function
+            or None for no limit. This filling is done regardless of the size of
+            the gap in the data. To only interpolate over gaps less than a given length,
+            see ``max_gap``.
+        max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
+            Maximum size of gap, a continuous sequence of NaNs, that will be filled.
+            Use None for no limit. When interpolating along a datetime64 dimension
+            and ``use_coordinate=True``, ``max_gap`` can be one of the following:
+
+            - a string that is valid input for pandas.to_timedelta
+            - a :py:class:`numpy.timedelta64` object
+            - a :py:class:`pandas.Timedelta` object
+            Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
+            dimensions has not been implemented yet. Gap length is defined as the difference
+            between coordinate values at the first data point after a gap and the last value
+            before a gap. For gaps at the beginning (end), gap length is defined as the difference
+            between coordinate values at the first (last) valid data point and the first (last) NaN.
+            For example, consider::
+
+                <xarray.DataArray (x: 9)>
+                array([nan, nan, nan,  1., nan, nan,  4., nan, nan])
+                Coordinates:
+                  * x        (x) int64 0 1 2 3 4 5 6 7 8
+
+            The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively
+        kwargs : dict, optional
+            parameters passed verbatim to the underlying interpolation function
 
         Returns
         -------
-        Dataset
+        interpolated: Dataset
+            Filled in Dataset.
 
         See also
         --------
@@ -3942,6 +3985,7 @@ def interpolate_na(
             method=method,
             limit=limit,
             use_coordinate=use_coordinate,
+            max_gap=max_gap,
             **kwargs,
         )
         return new
@@ -4022,7 +4066,7 @@ def reduce(
         keep_attrs: bool = None,
         keepdims: bool = False,
         numeric_only: bool = False,
-        allow_lazy: bool = False,
+        allow_lazy: bool = None,
         **kwargs: Any,
     ) -> "Dataset":
         """Reduce this dataset by applying `func` along some dimension(s).
@@ -4108,14 +4152,14 @@ def reduce(
             variables, coord_names=coord_names, attrs=attrs, indexes=indexes
         )
 
-    def apply(
+    def map(
         self,
         func: Callable,
         keep_attrs: bool = None,
         args: Iterable[Any] = (),
         **kwargs: Any,
     ) -> "Dataset":
-        """Apply a function over the data variables in this dataset.
+        """Apply a function to each variable in this dataset
 
         Parameters
         ----------
@@ -4135,7 +4179,7 @@ def apply(
         Returns
         -------
         applied : Dataset
-            Resulting dataset from applying ``func`` over each data variable.
+            Resulting dataset from applying ``func`` to each data variable.
 
         Examples
         --------
@@ -4148,7 +4192,7 @@ def apply(
         Data variables:
             foo      (dim_0, dim_1) float64 -0.3751 -1.951 -1.945 0.2948 0.711 -0.3948
             bar      (x) int64 -1 2
-        >>> ds.apply(np.fabs)
+        >>> ds.map(np.fabs)
         <xarray.Dataset>
         Dimensions:  (dim_0: 2, dim_1: 3, x: 2)
         Dimensions without coordinates: dim_0, dim_1, x
@@ -4165,6 +4209,27 @@ def apply(
         attrs = self.attrs if keep_attrs else None
         return type(self)(variables, attrs=attrs)
 
+    def apply(
+        self,
+        func: Callable,
+        keep_attrs: bool = None,
+        args: Iterable[Any] = (),
+        **kwargs: Any,
+    ) -> "Dataset":
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        Dataset.map
+        """
+        warnings.warn(
+            "Dataset.apply may be deprecated in the future. Using Dataset.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func, keep_attrs, args, **kwargs)
+
     def assign(
         self, variables: Mapping[Hashable, Any] = None, **variables_kwargs: Hashable
     ) -> "Dataset":
diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py
index f473eaa497d..79abbccea39 100644
--- a/xarray/core/extensions.py
+++ b/xarray/core/extensions.py
@@ -20,10 +20,15 @@ def __get__(self, obj, cls):
             # we're accessing the attribute of the class, i.e., Dataset.geo
             return self._accessor
 
+        # Use the same dict as @pandas.util.cache_readonly.
+        # It must be explicitly declared in obj.__slots__.
         try:
-            return obj._accessors[self._name]
-        except TypeError:
-            obj._accessors = {}
+            cache = obj._cache
+        except AttributeError:
+            cache = obj._cache = {}
+
+        try:
+            return cache[self._name]
         except KeyError:
             pass
 
@@ -35,7 +40,7 @@ def __get__(self, obj, cls):
             # something else (GH933):
             raise RuntimeError("error initializing %r accessor." % self._name)
 
-        obj._accessors[self._name] = accessor_obj
+        cache[self._name] = accessor_obj
         return accessor_obj
 
 
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index c8906e34737..38ecc04534a 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -557,6 +557,59 @@ def fillna(self, value):
         out = ops.fillna(self, value)
         return out
 
+    def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
+        """Compute the qth quantile over each array in the groups and
+        concatenate them together into a new array.
+
+        Parameters
+        ----------
+        q : float in range of [0,1] (or sequence of floats)
+            Quantile to compute, which must be between 0 and 1
+            inclusive.
+        dim : `...`, str or sequence of str, optional
+            Dimension(s) over which to apply quantile.
+            Defaults to the grouped dimension.
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+            This optional parameter specifies the interpolation method to
+            use when the desired quantile lies between two data points
+            ``i < j``:
+                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
+                  the fractional part of the index surrounded by ``i`` and
+                  ``j``.
+                * lower: ``i``.
+                * higher: ``j``.
+                * nearest: ``i`` or ``j``, whichever is nearest.
+                * midpoint: ``(i + j) / 2``.
+
+        Returns
+        -------
+        quantiles : Variable
+            If `q` is a single quantile, then the result is a
+            scalar. If multiple percentiles are given, first axis of
+            the result corresponds to the quantile. In either case a
+            quantile dimension is added to the return array. The other
+            dimensions are the dimensions that remain after the
+            reduction of the array.
+
+        See Also
+        --------
+        numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
+        DataArray.quantile
+        """
+        if dim is None:
+            dim = self._group_dim
+
+        out = self.map(
+            self._obj.__class__.quantile,
+            shortcut=False,
+            q=q,
+            dim=dim,
+            interpolation=interpolation,
+            keep_attrs=keep_attrs,
+        )
+
+        return out
+
     def where(self, cond, other=dtypes.NA):
         """Return elements from `self` or `other` depending on `cond`.
 
@@ -585,9 +638,7 @@ def _first_or_last(self, op, skipna, keep_attrs):
             return self._obj
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=True)
-        return self.reduce(
-            op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True
-        )
+        return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs)
 
     def first(self, skipna=None, keep_attrs=None):
         """Return the first element of each group along the group dimension
@@ -608,7 +659,7 @@ def assign_coords(self, coords=None, **coords_kwargs):
         Dataset.swap_dims
         """
         coords_kwargs = either_dict_or_kwargs(coords, coords_kwargs, "assign_coords")
-        return self.apply(lambda ds: ds.assign_coords(**coords_kwargs))
+        return self.map(lambda ds: ds.assign_coords(**coords_kwargs))
 
 
 def _maybe_reorder(xarray_obj, dim, positions):
@@ -655,8 +706,8 @@ def lookup_order(dimension):
         new_order = sorted(stacked.dims, key=lookup_order)
         return stacked.transpose(*new_order, transpose_coords=self._restore_coord_dims)
 
-    def apply(self, func, shortcut=False, args=(), **kwargs):
-        """Apply a function over each array in the group and concatenate them
+    def map(self, func, shortcut=False, args=(), **kwargs):
+        """Apply a function to each array in the group and concatenate them
         together into a new array.
 
         `func` is called like `func(ar, *args, **kwargs)` for each array `ar`
@@ -702,6 +753,21 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
         applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped)
         return self._combine(applied, shortcut=shortcut)
 
+    def apply(self, func, shortcut=False, args=(), **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DataArrayGroupBy.map
+        """
+        warnings.warn(
+            "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func, shortcut=shortcut, args=args, **kwargs)
+
     def _combine(self, applied, restore_coord_dims=False, shortcut=False):
         """Recombine the applied objects like the original."""
         applied_example, applied = peek_at(applied)
@@ -724,60 +790,6 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
         combined = self._maybe_unstack(combined)
         return combined
 
-    def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
-        """Compute the qth quantile over each array in the groups and
-        concatenate them together into a new array.
-
-        Parameters
-        ----------
-        q : float in range of [0,1] (or sequence of floats)
-            Quantile to compute, which must be between 0 and 1
-            inclusive.
-        dim : `...`, str or sequence of str, optional
-            Dimension(s) over which to apply quantile.
-            Defaults to the grouped dimension.
-        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
-            This optional parameter specifies the interpolation method to
-            use when the desired quantile lies between two data points
-            ``i < j``:
-                * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-                  the fractional part of the index surrounded by ``i`` and
-                  ``j``.
-                * lower: ``i``.
-                * higher: ``j``.
-                * nearest: ``i`` or ``j``, whichever is nearest.
-                * midpoint: ``(i + j) / 2``.
-
-        Returns
-        -------
-        quantiles : Variable
-            If `q` is a single quantile, then the result
-            is a scalar. If multiple percentiles are given, first axis of
-            the result corresponds to the quantile and a quantile dimension
-            is added to the return array. The other dimensions are the
-            dimensions that remain after the reduction of the array.
-
-        See Also
-        --------
-        numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
-        DataArray.quantile
-        """
-        if dim is None:
-            dim = self._group_dim
-
-        out = self.apply(
-            self._obj.__class__.quantile,
-            shortcut=False,
-            q=q,
-            dim=dim,
-            interpolation=interpolation,
-            keep_attrs=keep_attrs,
-        )
-
-        if np.asarray(q, dtype=np.float64).ndim == 0:
-            out = out.drop_vars("quantile")
-        return out
-
     def reduce(
         self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs
     ):
@@ -820,7 +832,7 @@ def reduce_array(ar):
 
         check_reduce_dims(dim, self.dims)
 
-        return self.apply(reduce_array, shortcut=shortcut)
+        return self.map(reduce_array, shortcut=shortcut)
 
 
 ops.inject_reduce_methods(DataArrayGroupBy)
@@ -828,8 +840,8 @@ def reduce_array(ar):
 
 
 class DatasetGroupBy(GroupBy, ImplementsDatasetReduce):
-    def apply(self, func, args=(), shortcut=None, **kwargs):
-        """Apply a function over each Dataset in the group and concatenate them
+    def map(self, func, args=(), shortcut=None, **kwargs):
+        """Apply a function to each Dataset in the group and concatenate them
         together into a new Dataset.
 
         `func` is called like `func(ds, *args, **kwargs)` for each dataset `ds`
@@ -862,6 +874,22 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
         applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped())
         return self._combine(applied)
 
+    def apply(self, func, args=(), shortcut=None, **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DatasetGroupBy.map
+        """
+
+        warnings.warn(
+            "GroupBy.apply may be deprecated in the future. Using GroupBy.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func, shortcut=shortcut, args=args, **kwargs)
+
     def _combine(self, applied):
         """Recombine the applied objects like the original."""
         applied_example, applied = peek_at(applied)
@@ -914,7 +942,7 @@ def reduce_dataset(ds):
 
         check_reduce_dims(dim, self.dims)
 
-        return self.apply(reduce_dataset)
+        return self.map(reduce_dataset)
 
     def assign(self, **kwargs):
         """Assign data variables by group.
@@ -923,7 +951,7 @@ def assign(self, **kwargs):
         --------
         Dataset.assign
         """
-        return self.apply(lambda ds: ds.assign(**kwargs))
+        return self.map(lambda ds: ds.assign(**kwargs))
 
 
 ops.inject_reduce_methods(DatasetGroupBy)
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index 77dde66484e..117fcaf8f81 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -1,18 +1,46 @@
 import warnings
 from functools import partial
-from typing import Any, Callable, Dict, Sequence
+from numbers import Number
+from typing import Any, Callable, Dict, Hashable, Sequence, Union
 
 import numpy as np
 import pandas as pd
 
 from . import utils
-from .common import _contains_datetime_like_objects
+from .common import _contains_datetime_like_objects, ones_like
 from .computation import apply_ufunc
 from .duck_array_ops import dask_array_type
 from .utils import OrderedSet, is_scalar
 from .variable import Variable, broadcast_variables
 
 
+def _get_nan_block_lengths(obj, dim: Hashable, index: Variable):
+    """
+    Return an object where each NaN element in 'obj' is replaced by the
+    length of the gap the element is in.
+    """
+
+    # make variable so that we get broadcasting for free
+    index = Variable([dim], index)
+
+    # algorithm from https://github.com/pydata/xarray/pull/3302#discussion_r324707072
+    arange = ones_like(obj) * index
+    valid = obj.notnull()
+    valid_arange = arange.where(valid)
+    cumulative_nans = valid_arange.ffill(dim=dim).fillna(index[0])
+
+    nan_block_lengths = (
+        cumulative_nans.diff(dim=dim, label="upper")
+        .reindex({dim: obj[dim]})
+        .where(valid)
+        .bfill(dim=dim)
+        .where(~valid, 0)
+        .fillna(index[-1] - valid_arange.max())
+    )
+
+    return nan_block_lengths
+
+
 class BaseInterpolator:
     """Generic interpolator class for normalizing interpolation methods
     """
@@ -178,7 +206,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):
     return ds
 
 
-def get_clean_interp_index(arr, dim, use_coordinate=True):
+def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True):
     """get index to use for x values in interpolation.
 
     If use_coordinate is True, the coordinate that shares the name of the
@@ -195,23 +223,33 @@ def get_clean_interp_index(arr, dim, use_coordinate=True):
             index = arr.coords[use_coordinate]
             if index.ndim != 1:
                 raise ValueError(
-                    "Coordinates used for interpolation must be 1D, "
-                    "%s is %dD." % (use_coordinate, index.ndim)
+                    f"Coordinates used for interpolation must be 1D, "
+                    f"{use_coordinate} is {index.ndim}D."
                 )
+            index = index.to_index()
+
+        # TODO: index.name is None for multiindexes
+        # set name for nice error messages below
+        if isinstance(index, pd.MultiIndex):
+            index.name = dim
+
+        if not index.is_monotonic:
+            raise ValueError(f"Index {index.name!r} must be monotonically increasing")
+
+        if not index.is_unique:
+            raise ValueError(f"Index {index.name!r} has duplicate values")
 
         # raise if index cannot be cast to a float (e.g. MultiIndex)
         try:
             index = index.values.astype(np.float64)
         except (TypeError, ValueError):
             # pandas raises a TypeError
-            # xarray/nuppy raise a ValueError
+            # xarray/numpy raise a ValueError
             raise TypeError(
-                "Index must be castable to float64 to support"
-                "interpolation, got: %s" % type(index)
+                f"Index {index.name!r} must be castable to float64 to support "
+                f"interpolation, got {type(index).__name__}."
             )
-        # check index sorting now so we can skip it later
-        if not (np.diff(index) > 0).all():
-            raise ValueError("Index must be monotonicly increasing")
+
     else:
         axis = arr.get_axis_num(dim)
         index = np.arange(arr.shape[axis], dtype=np.float64)
@@ -220,7 +258,13 @@ def get_clean_interp_index(arr, dim, use_coordinate=True):
 
 
 def interp_na(
-    self, dim=None, use_coordinate=True, method="linear", limit=None, **kwargs
+    self,
+    dim: Hashable = None,
+    use_coordinate: Union[bool, str] = True,
+    method: str = "linear",
+    limit: int = None,
+    max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
+    **kwargs,
 ):
     """Interpolate values according to different methods.
     """
@@ -230,6 +274,40 @@ def interp_na(
     if limit is not None:
         valids = _get_valid_fill_mask(self, dim, limit)
 
+    if max_gap is not None:
+        max_type = type(max_gap).__name__
+        if not is_scalar(max_gap):
+            raise ValueError("max_gap must be a scalar.")
+
+        if (
+            dim in self.indexes
+            and isinstance(self.indexes[dim], pd.DatetimeIndex)
+            and use_coordinate
+        ):
+            if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)):
+                raise TypeError(
+                    f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}"
+                )
+
+            if isinstance(max_gap, str):
+                try:
+                    max_gap = pd.to_timedelta(max_gap)
+                except ValueError:
+                    raise ValueError(
+                        f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta"
+                    )
+
+            if isinstance(max_gap, pd.Timedelta):
+                max_gap = np.timedelta64(max_gap.value, "ns")
+
+            max_gap = np.timedelta64(max_gap, "ns").astype(np.float64)
+
+        if not use_coordinate:
+            if not isinstance(max_gap, (Number, np.number)):
+                raise TypeError(
+                    f"Expected integer or floating point max_gap since use_coordinate=False. Received {max_type}."
+                )
+
     # method
     index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate)
     interp_class, kwargs = _get_interpolator(method, **kwargs)
@@ -253,6 +331,14 @@ def interp_na(
     if limit is not None:
         arr = arr.where(valids)
 
+    if max_gap is not None:
+        if dim not in self.coords:
+            raise NotImplementedError(
+                "max_gap not implemented for unlabeled coordinates yet."
+            )
+        nan_block_lengths = _get_nan_block_lengths(self, dim, index)
+        arr = arr.where(nan_block_lengths <= max_gap)
+
     return arr
 
 
diff --git a/xarray/core/resample.py b/xarray/core/resample.py
index 2cb1bd55e19..fb388490d06 100644
--- a/xarray/core/resample.py
+++ b/xarray/core/resample.py
@@ -1,3 +1,5 @@
+import warnings
+
 from . import ops
 from .groupby import DataArrayGroupBy, DatasetGroupBy
 
@@ -173,8 +175,8 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
 
         super().__init__(*args, **kwargs)
 
-    def apply(self, func, shortcut=False, args=(), **kwargs):
-        """Apply a function over each array in the group and concatenate them
+    def map(self, func, shortcut=False, args=(), **kwargs):
+        """Apply a function to each array in the group and concatenate them
         together into a new array.
 
         `func` is called like `func(ar, *args, **kwargs)` for each array `ar`
@@ -212,7 +214,9 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
         applied : DataArray or DataArray
             The result of splitting, applying and combining this array.
         """
-        combined = super().apply(func, shortcut=shortcut, args=args, **kwargs)
+        # TODO: the argument order for Resample doesn't match that for its parent,
+        # GroupBy
+        combined = super().map(func, shortcut=shortcut, args=args, **kwargs)
 
         # If the aggregation function didn't drop the original resampling
         # dimension, then we need to do so before we can rename the proxy
@@ -225,6 +229,21 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
 
         return combined
 
+    def apply(self, func, args=(), shortcut=None, **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DataArrayResample.map
+        """
+        warnings.warn(
+            "Resample.apply may be deprecated in the future. Using Resample.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func=func, shortcut=shortcut, args=args, **kwargs)
+
 
 ops.inject_reduce_methods(DataArrayResample)
 ops.inject_binary_ops(DataArrayResample)
@@ -247,7 +266,7 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
 
         super().__init__(*args, **kwargs)
 
-    def apply(self, func, args=(), shortcut=None, **kwargs):
+    def map(self, func, args=(), shortcut=None, **kwargs):
         """Apply a function over each Dataset in the groups generated for
         resampling  and concatenate them together into a new Dataset.
 
@@ -282,6 +301,22 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
 
         return combined.rename({self._resample_dim: self._dim})
 
+    def apply(self, func, args=(), shortcut=None, **kwargs):
+        """
+        Backward compatible implementation of ``map``
+
+        See Also
+        --------
+        DataSetResample.map
+        """
+
+        warnings.warn(
+            "Resample.apply may be deprecated in the future. Using Resample.map is encouraged",
+            PendingDeprecationWarning,
+            stacklevel=2,
+        )
+        return self.map(func=func, shortcut=shortcut, args=args, **kwargs)
+
     def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
         """Reduce the items in this group by applying `func` along the
         pre-defined resampling dimension.
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index 90edd9a3e2c..ea6d72b2e03 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -1,4 +1,5 @@
 import functools
+import warnings
 from typing import Any, Callable, Dict
 
 import numpy as np
@@ -351,6 +352,14 @@ def _bottleneck_reduce(self, func, **kwargs):
     def _numpy_or_bottleneck_reduce(
         self, array_agg_func, bottleneck_move_func, **kwargs
     ):
+        if "dim" in kwargs:
+            warnings.warn(
+                f"Reductions will be applied along the rolling dimension '{self.dim}'. Passing the 'dim' kwarg to reduction operations has no effect and will raise an error in xarray 0.16.0.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+            del kwargs["dim"]
+
         if bottleneck_move_func is not None and not isinstance(
             self.obj.data, dask_array_type
         ):
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 43b3df1dcd3..348d16e8521 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1,5 +1,7 @@
+import copy
 import functools
 import itertools
+import warnings
 from collections import defaultdict
 from datetime import timedelta
 from distutils.version import LooseVersion
@@ -23,10 +25,11 @@
 from .pycompat import dask_array_type, integer_types
 from .utils import (
     OrderedSet,
+    _default,
     decode_numpy_dict_values,
     either_dict_or_kwargs,
-    infix_dims,
     ensure_us_time_resolution,
+    infix_dims,
 )
 
 try:
@@ -393,7 +396,9 @@ def compute(self, **kwargs):
     def __dask_tokenize__(self):
         # Use v.data, instead of v._data, in order to cope with the wrappers
         # around NetCDF and the like
-        return type(self), self._dims, self.data, self._attrs
+        from dask.base import normalize_token
+
+        return normalize_token((type(self), self._dims, self.data, self._attrs))
 
     def __dask_graph__(self):
         if isinstance(self._data, dask_array_type):
@@ -884,7 +889,20 @@ def copy(self, deep=True, data=None):
         # note:
         # dims is already an immutable tuple
         # attributes and encoding will be copied when the new Array is created
-        return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True)
+        return self._replace(data=data)
+
+    def _replace(
+        self, dims=_default, data=_default, attrs=_default, encoding=_default
+    ) -> "Variable":
+        if dims is _default:
+            dims = copy.copy(self._dims)
+        if data is _default:
+            data = copy.copy(self.data)
+        if attrs is _default:
+            attrs = copy.copy(self._attrs)
+        if encoding is _default:
+            encoding = copy.copy(self._encoding)
+        return type(self)(dims, data, attrs, encoding, fastpath=True)
 
     def __copy__(self):
         return self.copy(deep=False)
@@ -1425,7 +1443,7 @@ def reduce(
         axis=None,
         keep_attrs=None,
         keepdims=False,
-        allow_lazy=False,
+        allow_lazy=None,
         **kwargs,
     ):
         """Reduce this array by applying `func` along some dimension(s).
@@ -1466,7 +1484,17 @@ def reduce(
 
         if dim is not None:
             axis = self.get_axis_num(dim)
+
+        if allow_lazy is not None:
+            warnings.warn(
+                "allow_lazy is deprecated and will be removed in version 0.16.0. It is now True by default.",
+                DeprecationWarning,
+            )
+        else:
+            allow_lazy = True
+
         input_data = self.data if allow_lazy else self.values
+
         if axis is not None:
             data = func(input_data, axis=axis, **kwargs)
         else:
@@ -1973,8 +2001,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
             self._data = PandasIndexAdapter(self._data)
 
     def __dask_tokenize__(self):
+        from dask.base import normalize_token
+
         # Don't waste time converting pd.Index to np.ndarray
-        return (type(self), self._dims, self._data.array, self._attrs)
+        return normalize_token((type(self), self._dims, self._data.array, self._attrs))
 
     def load(self):
         # data is already loaded into memory for IndexVariable
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index ca68f617144..5c754c3f49b 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -288,7 +288,7 @@ def line(
         )
 
     # The allargs dict passed to _easy_facetgrid above contains args
-    if args is ():
+    if args == ():
         args = kwargs.pop("args", ())
     else:
         assert "args" not in kwargs
diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py
index 6cd584daa96..3e0474e7b60 100644
--- a/xarray/tests/test_coding.py
+++ b/xarray/tests/test_coding.py
@@ -20,6 +20,23 @@ def test_CFMaskCoder_decode():
     assert_identical(expected, encoded)
 
 
+def test_CFMaskCoder_encode_missing_fill_values_conflict():
+    original = xr.Variable(
+        ("x",),
+        [0.0, -1.0, 1.0],
+        encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)},
+    )
+    coder = variables.CFMaskCoder()
+    encoded = coder.encode(original)
+
+    assert encoded.dtype == encoded.attrs["missing_value"].dtype
+    assert encoded.dtype == encoded.attrs["_FillValue"].dtype
+
+    with pytest.warns(variables.SerializationWarning):
+        roundtripped = coder.decode(coder.encode(original))
+        assert_identical(roundtripped, original)
+
+
 def test_CFMaskCoder_missing_value():
     expected = xr.DataArray(
         np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]),
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index fa8ae9991d7..4c1f317342f 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -12,6 +12,7 @@
 import xarray as xr
 import xarray.ufuncs as xu
 from xarray import DataArray, Dataset, Variable
+from xarray.core import duck_array_ops
 from xarray.testing import assert_chunks_equal
 from xarray.tests import mock
 
@@ -217,6 +218,8 @@ def test_reduce(self):
         self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
         with raises_regex(NotImplementedError, "dask"):
             v.median()
+        with raise_if_dask_computes():
+            v.reduce(duck_array_ops.mean)
 
     def test_missing_values(self):
         values = np.array([0, 1, np.nan, 3])
@@ -488,7 +491,17 @@ def test_groupby(self):
         v = self.lazy_array
 
         expected = u.groupby("x").mean(...)
-        actual = v.groupby("x").mean(...)
+        with raise_if_dask_computes():
+            actual = v.groupby("x").mean(...)
+        self.assertLazyAndAllClose(expected, actual)
+
+    def test_rolling(self):
+        u = self.eager_array
+        v = self.lazy_array
+
+        expected = u.rolling(x=2).mean()
+        with raise_if_dask_computes():
+            actual = v.rolling(x=2).mean()
         self.assertLazyAndAllClose(expected, actual)
 
     def test_groupby_first(self):
@@ -500,7 +513,8 @@ def test_groupby_first(self):
         with raises_regex(NotImplementedError, "dask"):
             v.groupby("ab").first()
         expected = u.groupby("ab").first()
-        actual = v.groupby("ab").first(skipna=False)
+        with raise_if_dask_computes():
+            actual = v.groupby("ab").first(skipna=False)
         self.assertLazyAndAllClose(expected, actual)
 
     def test_reindex(self):
@@ -1283,6 +1297,32 @@ def test_token_identical(obj, transform):
     )
 
 
+def test_recursive_token():
+    """Test that tokenization is invoked recursively, and doesn't just rely on the
+    output of str()
+    """
+    a = np.ones(10000)
+    b = np.ones(10000)
+    b[5000] = 2
+    assert str(a) == str(b)
+    assert dask.base.tokenize(a) != dask.base.tokenize(b)
+
+    # Test DataArray and Variable
+    da_a = DataArray(a)
+    da_b = DataArray(b)
+    assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b)
+
+    # Test Dataset
+    ds_a = da_a.to_dataset(name="x")
+    ds_b = da_b.to_dataset(name="x")
+    assert dask.base.tokenize(ds_a) != dask.base.tokenize(ds_b)
+
+    # Test IndexVariable
+    da_a = DataArray(a, dims=["x"], coords={"x": a})
+    da_b = DataArray(a, dims=["x"], coords={"x": b})
+    assert dask.base.tokenize(da_a) != dask.base.tokenize(da_b)
+
+
 @requires_scipy_or_netCDF4
 def test_normalize_token_with_backend(map_ds):
     with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file:
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index acfe684d220..4c3553c867e 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -1182,6 +1182,16 @@ def test_selection_multiindex_remove_unused(self):
         expected = expected.set_index(xy=["x", "y"]).unstack()
         assert_identical(expected, actual)
 
+    def test_selection_multiindex_from_level(self):
+        # GH: 3512
+        da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"})
+        db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"})
+        data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"])
+        assert data.dims == ("xy",)
+        actual = data.sel(y="a")
+        expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop("y")
+        assert_equal(actual, expected)
+
     def test_virtual_default_coords(self):
         array = DataArray(np.zeros((5,)), dims="x")
         expected = DataArray(range(5), dims="x", name="x")
@@ -2417,7 +2427,7 @@ def test_groupby_properties(self):
             assert_array_equal(expected_groups[key], grouped.groups[key])
         assert 3 == len(grouped)
 
-    def test_groupby_apply_identity(self):
+    def test_groupby_map_identity(self):
         expected = self.make_groupby_example_array()
         idx = expected.coords["y"]
 
@@ -2428,7 +2438,7 @@ def identity(x):
             for shortcut in [False, True]:
                 for squeeze in [False, True]:
                     grouped = expected.groupby(g, squeeze=squeeze)
-                    actual = grouped.apply(identity, shortcut=shortcut)
+                    actual = grouped.map(identity, shortcut=shortcut)
                     assert_identical(expected, actual)
 
     def test_groupby_sum(self):
@@ -2461,7 +2471,7 @@ def test_groupby_sum(self):
             [["a", "b", "c"]],
             ["abc"],
         )
-        actual = array["y"].groupby("abc").apply(np.sum)
+        actual = array["y"].groupby("abc").map(np.sum)
         assert_allclose(expected, actual)
         actual = array["y"].groupby("abc").sum(...)
         assert_allclose(expected, actual)
@@ -2532,7 +2542,7 @@ def test_groupby_reduce_attrs(self):
                     expected.attrs["foo"] = "bar"
                 assert_identical(expected, actual)
 
-    def test_groupby_apply_center(self):
+    def test_groupby_map_center(self):
         def center(x):
             return x - np.mean(x)
 
@@ -2545,16 +2555,16 @@ def center(x):
         )
         expected_ds["foo"] = (["x", "y"], exp_data)
         expected_centered = expected_ds["foo"]
-        assert_allclose(expected_centered, grouped.apply(center))
+        assert_allclose(expected_centered, grouped.map(center))
 
-    def test_groupby_apply_ndarray(self):
+    def test_groupby_map_ndarray(self):
         # regression test for #326
         array = self.make_groupby_example_array()
         grouped = array.groupby("abc")
-        actual = grouped.apply(np.asarray)
+        actual = grouped.map(np.asarray)
         assert_equal(array, actual)
 
-    def test_groupby_apply_changes_metadata(self):
+    def test_groupby_map_changes_metadata(self):
         def change_metadata(x):
             x.coords["x"] = x.coords["x"] * 2
             x.attrs["fruit"] = "lemon"
@@ -2562,7 +2572,7 @@ def change_metadata(x):
 
         array = self.make_groupby_example_array()
         grouped = array.groupby("abc")
-        actual = grouped.apply(change_metadata)
+        actual = grouped.map(change_metadata)
         expected = array.copy()
         expected = change_metadata(expected)
         assert_equal(expected, actual)
@@ -2631,7 +2641,7 @@ def test_groupby_restore_dim_order(self):
             ("a", ("a", "y")),
             ("b", ("x", "b")),
         ]:
-            result = array.groupby(by).apply(lambda x: x.squeeze())
+            result = array.groupby(by).map(lambda x: x.squeeze())
             assert result.dims == expected_dims
 
     def test_groupby_restore_coord_dims(self):
@@ -2651,13 +2661,13 @@ def test_groupby_restore_coord_dims(self):
             ("a", ("a", "y")),
             ("b", ("x", "b")),
         ]:
-            result = array.groupby(by, restore_coord_dims=True).apply(
+            result = array.groupby(by, restore_coord_dims=True).map(
                 lambda x: x.squeeze()
             )["c"]
             assert result.dims == expected_dims
 
         with pytest.warns(FutureWarning):
-            array.groupby("x").apply(lambda x: x.squeeze())
+            array.groupby("x").map(lambda x: x.squeeze())
 
     def test_groupby_first_and_last(self):
         array = DataArray([1, 2, 3, 4, 5], dims="x")
@@ -2699,9 +2709,9 @@ def test_groupby_multidim(self):
             actual_sum = array.groupby(dim).sum(...)
             assert_identical(expected_sum, actual_sum)
 
-    def test_groupby_multidim_apply(self):
+    def test_groupby_multidim_map(self):
         array = self.make_groupby_multidim_example_array()
-        actual = array.groupby("lon").apply(lambda x: x - x.mean())
+        actual = array.groupby("lon").map(lambda x: x - x.mean())
         expected = DataArray(
             [[[-2.5, -6.0], [-5.0, -8.5]], [[2.5, 3.0], [8.0, 8.5]]],
             coords=array.coords,
@@ -2722,7 +2732,7 @@ def test_groupby_bins(self):
         )
         # the problem with this is that it overwrites the dimensions of array!
         # actual = array.groupby('dim_0', bins=bins).sum()
-        actual = array.groupby_bins("dim_0", bins).apply(lambda x: x.sum())
+        actual = array.groupby_bins("dim_0", bins).map(lambda x: x.sum())
         assert_identical(expected, actual)
         # make sure original array dims are unchanged
         assert len(array.dim_0) == 4
@@ -2744,12 +2754,12 @@ def test_groupby_bins_multidim(self):
         bins = [0, 15, 20]
         bin_coords = pd.cut(array["lat"].values.flat, bins).categories
         expected = DataArray([16, 40], dims="lat_bins", coords={"lat_bins": bin_coords})
-        actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum())
+        actual = array.groupby_bins("lat", bins).map(lambda x: x.sum())
         assert_identical(expected, actual)
         # modify the array coordinates to be non-monotonic after unstacking
         array["lat"].data = np.array([[10.0, 20.0], [20.0, 10.0]])
         expected = DataArray([28, 28], dims="lat_bins", coords={"lat_bins": bin_coords})
-        actual = array.groupby_bins("lat", bins).apply(lambda x: x.sum())
+        actual = array.groupby_bins("lat", bins).map(lambda x: x.sum())
         assert_identical(expected, actual)
 
     def test_groupby_bins_sort(self):
@@ -2784,7 +2794,7 @@ def func(arg1, arg2, arg3=0.0):
         times = pd.date_range("2000", periods=3, freq="D")
         da = xr.DataArray([1.0, 1.0, 1.0], coords=[times], dims=["time"])
         expected = xr.DataArray([3.0, 3.0, 3.0], coords=[times], dims=["time"])
-        actual = da.resample(time="D").apply(func, args=(1.0,), arg3=1.0)
+        actual = da.resample(time="D").map(func, args=(1.0,), arg3=1.0)
         assert_identical(actual, expected)
 
     def test_resample_first(self):
@@ -4188,6 +4198,9 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods):
     )
     assert_array_equal(actual.values, expected)
 
+    with pytest.warns(DeprecationWarning, match="Reductions will be applied"):
+        getattr(rolling_obj, name)(dim="time")
+
     # Test center
     rolling_obj = da.rolling(time=7, center=center)
     actual = getattr(rolling_obj, name)()["time"]
@@ -4203,6 +4216,9 @@ def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window):
     # dask version
     rolling_obj = da_dask.rolling(time=window, min_periods=min_periods, center=center)
     actual = getattr(rolling_obj, name)().load()
+    if name != "count":
+        with pytest.warns(DeprecationWarning, match="Reductions will be applied"):
+            getattr(rolling_obj, name)(dim="time")
     # numpy version
     rolling_obj = da_dask.load().rolling(
         time=window, min_periods=min_periods, center=center
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 2a92d7f7730..2045b90c927 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.core.indexes.datetimes import DatetimeIndex
 
 import xarray as xr
 from xarray import (
@@ -22,6 +23,7 @@
     open_dataset,
     set_options,
 )
+from xarray.coding.cftimeindex import CFTimeIndex
 from xarray.core import dtypes, indexing, utils
 from xarray.core.common import duck_array_ops, full_like
 from xarray.core.npcompat import IS_NEP18_ACTIVE
@@ -90,6 +92,14 @@ def create_append_test_data(seed=None):
     string_var = np.array(["ae", "bc", "df"], dtype=object)
     string_var_to_append = np.array(["asdf", "asdfg"], dtype=object)
     unicode_var = ["áó", "áó", "áó"]
+    datetime_var = np.array(
+        ["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[s]"
+    )
+    datetime_var_to_append = np.array(
+        ["2019-01-04", "2019-01-05"], dtype="datetime64[s]"
+    )
+    bool_var = np.array([True, False, True], dtype=np.bool)
+    bool_var_to_append = np.array([False, True], dtype=np.bool)
 
     ds = xr.Dataset(
         data_vars={
@@ -102,6 +112,8 @@ def create_append_test_data(seed=None):
             "unicode_var": xr.DataArray(
                 unicode_var, coords=[time1], dims=["time"]
             ).astype(np.unicode_),
+            "datetime_var": xr.DataArray(datetime_var, coords=[time1], dims=["time"]),
+            "bool_var": xr.DataArray(bool_var, coords=[time1], dims=["time"]),
         }
     )
 
@@ -118,6 +130,10 @@ def create_append_test_data(seed=None):
             "unicode_var": xr.DataArray(
                 unicode_var[:nt2], coords=[time2], dims=["time"]
             ).astype(np.unicode_),
+            "datetime_var": xr.DataArray(
+                datetime_var_to_append, coords=[time2], dims=["time"]
+            ),
+            "bool_var": xr.DataArray(bool_var_to_append, coords=[time2], dims=["time"]),
         }
     )
 
@@ -2444,6 +2460,53 @@ def test_rename_vars(self):
         with pytest.raises(ValueError):
             original.rename_vars(names_dict_bad)
 
+    @requires_cftime
+    def test_rename_does_not_change_CFTimeIndex_type(self):
+        # make sure CFTimeIndex is not converted to DatetimeIndex #3522
+
+        time = xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap")
+        orig = Dataset(coords={"time": time})
+
+        renamed = orig.rename(time="time_new")
+        assert "time_new" in renamed.indexes
+        assert isinstance(renamed.indexes["time_new"], CFTimeIndex)
+        assert renamed.indexes["time_new"].name == "time_new"
+
+        # check original has not changed
+        assert "time" in orig.indexes
+        assert isinstance(orig.indexes["time"], CFTimeIndex)
+        assert orig.indexes["time"].name == "time"
+
+        # note: rename_dims(time="time_new") drops "ds.indexes"
+        renamed = orig.rename_dims()
+        assert isinstance(renamed.indexes["time"], CFTimeIndex)
+
+        renamed = orig.rename_vars()
+        assert isinstance(renamed.indexes["time"], CFTimeIndex)
+
+    def test_rename_does_not_change_DatetimeIndex_type(self):
+        # make sure DatetimeIndex is conderved on rename
+
+        time = pd.date_range(start="2000", periods=6, freq="2MS")
+        orig = Dataset(coords={"time": time})
+
+        renamed = orig.rename(time="time_new")
+        assert "time_new" in renamed.indexes
+        assert isinstance(renamed.indexes["time_new"], DatetimeIndex)
+        assert renamed.indexes["time_new"].name == "time_new"
+
+        # check original has not changed
+        assert "time" in orig.indexes
+        assert isinstance(orig.indexes["time"], DatetimeIndex)
+        assert orig.indexes["time"].name == "time"
+
+        # note: rename_dims(time="time_new") drops "ds.indexes"
+        renamed = orig.rename_dims()
+        assert isinstance(renamed.indexes["time"], DatetimeIndex)
+
+        renamed = orig.rename_vars()
+        assert isinstance(renamed.indexes["time"], DatetimeIndex)
+
     def test_swap_dims(self):
         original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42})
         expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")})
@@ -2731,6 +2794,23 @@ def test_unstack_errors(self):
         with raises_regex(ValueError, "do not have a MultiIndex"):
             ds.unstack("x")
 
+    def test_unstack_fill_value(self):
+        ds = xr.Dataset(
+            {"var": (("x",), np.arange(6))},
+            coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
+        )
+        # make ds incomplete
+        ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
+        # test fill_value
+        actual = ds.unstack("index", fill_value=-1)
+        expected = ds.unstack("index").fillna(-1).astype(np.int)
+        assert actual["var"].dtype == np.int
+        assert_equal(actual, expected)
+
+        actual = ds["var"].unstack("index", fill_value=-1)
+        expected = ds["var"].unstack("index").fillna(-1).astype(np.int)
+        assert actual.equals(expected)
+
     def test_stack_unstack_fast(self):
         ds = Dataset(
             {
@@ -3310,17 +3390,17 @@ def identity(x):
             return x
 
         for k in ["x", "c", "y"]:
-            actual = data.groupby(k, squeeze=False).apply(identity)
+            actual = data.groupby(k, squeeze=False).map(identity)
             assert_equal(data, actual)
 
     def test_groupby_returns_new_type(self):
         data = Dataset({"z": (["x", "y"], np.random.randn(3, 5))})
 
-        actual = data.groupby("x").apply(lambda ds: ds["z"])
+        actual = data.groupby("x").map(lambda ds: ds["z"])
         expected = data["z"]
         assert_identical(expected, actual)
 
-        actual = data["z"].groupby("x").apply(lambda x: x.to_dataset())
+        actual = data["z"].groupby("x").map(lambda x: x.to_dataset())
         expected = data
         assert_identical(expected, actual)
 
@@ -3639,7 +3719,7 @@ def func(arg1, arg2, arg3=0.0):
         times = pd.date_range("2000", freq="D", periods=3)
         ds = xr.Dataset({"foo": ("time", [1.0, 1.0, 1.0]), "time": times})
         expected = xr.Dataset({"foo": ("time", [3.0, 3.0, 3.0]), "time": times})
-        actual = ds.resample(time="D").apply(func, args=(1.0,), arg3=1.0)
+        actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0)
         assert_identical(expected, actual)
 
     def test_to_array(self):
@@ -4515,31 +4595,36 @@ def test_count(self):
         actual = ds.count()
         assert_identical(expected, actual)
 
-    def test_apply(self):
+    def test_map(self):
         data = create_test_data()
         data.attrs["foo"] = "bar"
 
-        assert_identical(data.apply(np.mean), data.mean())
+        assert_identical(data.map(np.mean), data.mean())
 
         expected = data.mean(keep_attrs=True)
-        actual = data.apply(lambda x: x.mean(keep_attrs=True), keep_attrs=True)
+        actual = data.map(lambda x: x.mean(keep_attrs=True), keep_attrs=True)
         assert_identical(expected, actual)
 
-        assert_identical(
-            data.apply(lambda x: x, keep_attrs=True), data.drop_vars("time")
-        )
+        assert_identical(data.map(lambda x: x, keep_attrs=True), data.drop_vars("time"))
 
         def scale(x, multiple=1):
             return multiple * x
 
-        actual = data.apply(scale, multiple=2)
+        actual = data.map(scale, multiple=2)
         assert_equal(actual["var1"], 2 * data["var1"])
         assert_identical(actual["numbers"], data["numbers"])
 
-        actual = data.apply(np.asarray)
+        actual = data.map(np.asarray)
         expected = data.drop_vars("time")  # time is not used on a data var
         assert_equal(expected, actual)
 
+    def test_apply_pending_deprecated_map(self):
+        data = create_test_data()
+        data.attrs["foo"] = "bar"
+
+        with pytest.warns(PendingDeprecationWarning):
+            assert_identical(data.apply(np.mean), data.mean())
+
     def make_example_math_dataset(self):
         variables = {
             "bar": ("x", np.arange(100, 400, 100)),
@@ -4566,15 +4651,15 @@ def test_dataset_number_math(self):
     def test_unary_ops(self):
         ds = self.make_example_math_dataset()
 
-        assert_identical(ds.apply(abs), abs(ds))
-        assert_identical(ds.apply(lambda x: x + 4), ds + 4)
+        assert_identical(ds.map(abs), abs(ds))
+        assert_identical(ds.map(lambda x: x + 4), ds + 4)
 
         for func in [
             lambda x: x.isnull(),
             lambda x: x.round(),
             lambda x: x.astype(int),
         ]:
-            assert_identical(ds.apply(func), func(ds))
+            assert_identical(ds.map(func), func(ds))
 
         assert_identical(ds.isnull(), ~ds.notnull())
 
@@ -4587,7 +4672,7 @@ def test_unary_ops(self):
     def test_dataset_array_math(self):
         ds = self.make_example_math_dataset()
 
-        expected = ds.apply(lambda x: x - ds["foo"])
+        expected = ds.map(lambda x: x - ds["foo"])
         assert_identical(expected, ds - ds["foo"])
         assert_identical(expected, -ds["foo"] + ds)
         assert_identical(expected, ds - ds["foo"].variable)
@@ -4596,7 +4681,7 @@ def test_dataset_array_math(self):
         actual -= ds["foo"]
         assert_identical(expected, actual)
 
-        expected = ds.apply(lambda x: x + ds["bar"])
+        expected = ds.map(lambda x: x + ds["bar"])
         assert_identical(expected, ds + ds["bar"])
         actual = ds.copy(deep=True)
         actual += ds["bar"]
@@ -4612,7 +4697,7 @@ def test_dataset_dataset_math(self):
         assert_identical(ds, ds + 0 * ds)
         assert_identical(ds, ds + {"foo": 0, "bar": 0})
 
-        expected = ds.apply(lambda x: 2 * x)
+        expected = ds.map(lambda x: 2 * x)
         assert_identical(expected, 2 * ds)
         assert_identical(expected, ds + ds)
         assert_identical(expected, ds + ds.data_vars)
@@ -4709,7 +4794,7 @@ def test_dataset_transpose(self):
         assert_identical(expected, actual)
 
         actual = ds.transpose("x", "y")
-        expected = ds.apply(lambda x: x.transpose("x", "y", transpose_coords=True))
+        expected = ds.map(lambda x: x.transpose("x", "y", transpose_coords=True))
         assert_identical(expected, actual)
 
         ds = create_test_data()
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index e2216547ac8..97bd31ae050 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -45,14 +45,14 @@ def test_groupby_dims_property(dataset):
     assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims
 
 
-def test_multi_index_groupby_apply(dataset):
+def test_multi_index_groupby_map(dataset):
     # regression test for GH873
     ds = dataset.isel(z=1, drop=True)[["foo"]]
     expected = 2 * ds
     actual = (
         ds.stack(space=["x", "y"])
         .groupby("space")
-        .apply(lambda x: 2 * x)
+        .map(lambda x: 2 * x)
         .unstack("space")
     )
     assert_equal(expected, actual)
@@ -107,23 +107,23 @@ def test_groupby_input_mutation():
     assert_identical(array, array_copy)  # should not modify inputs
 
 
-def test_da_groupby_apply_func_args():
+def test_da_groupby_map_func_args():
     def func(arg1, arg2, arg3=0):
         return arg1 + arg2 + arg3
 
     array = xr.DataArray([1, 1, 1], [("x", [1, 2, 3])])
     expected = xr.DataArray([3, 3, 3], [("x", [1, 2, 3])])
-    actual = array.groupby("x").apply(func, args=(1,), arg3=1)
+    actual = array.groupby("x").map(func, args=(1,), arg3=1)
     assert_identical(expected, actual)
 
 
-def test_ds_groupby_apply_func_args():
+def test_ds_groupby_map_func_args():
     def func(arg1, arg2, arg3=0):
         return arg1 + arg2 + arg3
 
     dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]})
     expected = xr.Dataset({"foo": ("x", [3, 3, 3])}, {"x": [1, 2, 3]})
-    actual = dataset.groupby("x").apply(func, args=(1,), arg3=1)
+    actual = dataset.groupby("x").map(func, args=(1,), arg3=1)
     assert_identical(expected, actual)
 
 
@@ -137,42 +137,58 @@ def test_da_groupby_empty():
 
 def test_da_groupby_quantile():
 
-    array = xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])])
+    array = xr.DataArray(
+        data=[1, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x"
+    )
 
     # Scalar quantile
-    expected = xr.DataArray([2, 5], [("x", [1, 2])])
+    expected = xr.DataArray(
+        data=[2, 5], coords={"x": [1, 2], "quantile": 0.5}, dims="x"
+    )
     actual = array.groupby("x").quantile(0.5)
     assert_identical(expected, actual)
 
     # Vector quantile
-    expected = xr.DataArray([[1, 3], [4, 6]], [("x", [1, 2]), ("quantile", [0, 1])])
+    expected = xr.DataArray(
+        data=[[1, 3], [4, 6]],
+        coords={"x": [1, 2], "quantile": [0, 1]},
+        dims=("x", "quantile"),
+    )
     actual = array.groupby("x").quantile([0, 1])
     assert_identical(expected, actual)
 
     # Multiple dimensions
     array = xr.DataArray(
-        [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
-        [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])],
+        data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]},
+        dims=("x", "y"),
     )
 
     actual_x = array.groupby("x").quantile(0, dim=...)
-    expected_x = xr.DataArray([1, 4], [("x", [1, 2])])
+    expected_x = xr.DataArray(
+        data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x"
+    )
     assert_identical(expected_x, actual_x)
 
     actual_y = array.groupby("y").quantile(0, dim=...)
-    expected_y = xr.DataArray([1, 22], [("y", [0, 1])])
+    expected_y = xr.DataArray(
+        data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y"
+    )
     assert_identical(expected_y, actual_y)
 
     actual_xx = array.groupby("x").quantile(0)
     expected_xx = xr.DataArray(
-        [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])]
+        data=[[1, 11, 22], [4, 15, 24]],
+        coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0},
+        dims=("x", "y"),
     )
     assert_identical(expected_xx, actual_xx)
 
     actual_yy = array.groupby("y").quantile(0)
     expected_yy = xr.DataArray(
-        [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
-        [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])],
+        data=[[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0},
+        dims=("x", "y"),
     )
     assert_identical(expected_yy, actual_yy)
 
@@ -180,14 +196,14 @@ def test_da_groupby_quantile():
     x = [0, 1]
     foo = xr.DataArray(
         np.reshape(np.arange(365 * 2), (365, 2)),
-        coords=dict(time=times, x=x),
+        coords={"time": times, "x": x},
         dims=("time", "x"),
     )
     g = foo.groupby(foo.time.dt.month)
 
     actual = g.quantile(0, dim=...)
     expected = xr.DataArray(
-        [
+        data=[
             0.0,
             62.0,
             120.0,
@@ -201,12 +217,111 @@ def test_da_groupby_quantile():
             610.0,
             670.0,
         ],
-        [("month", np.arange(1, 13))],
+        coords={"month": np.arange(1, 13), "quantile": 0},
+        dims="month",
     )
     assert_identical(expected, actual)
 
     actual = g.quantile(0, dim="time")[:2]
-    expected = xr.DataArray([[0.0, 1], [62.0, 63]], [("month", [1, 2]), ("x", [0, 1])])
+    expected = xr.DataArray(
+        data=[[0.0, 1], [62.0, 63]],
+        coords={"month": [1, 2], "x": [0, 1], "quantile": 0},
+        dims=("month", "x"),
+    )
+    assert_identical(expected, actual)
+
+
+def test_ds_groupby_quantile():
+    ds = xr.Dataset(
+        data_vars={"a": ("x", [1, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]}
+    )
+
+    # Scalar quantile
+    expected = xr.Dataset(
+        data_vars={"a": ("x", [2, 5])}, coords={"quantile": 0.5, "x": [1, 2]}
+    )
+    actual = ds.groupby("x").quantile(0.5)
+    assert_identical(expected, actual)
+
+    # Vector quantile
+    expected = xr.Dataset(
+        data_vars={"a": (("x", "quantile"), [[1, 3], [4, 6]])},
+        coords={"x": [1, 2], "quantile": [0, 1]},
+    )
+    actual = ds.groupby("x").quantile([0, 1])
+    assert_identical(expected, actual)
+
+    # Multiple dimensions
+    ds = xr.Dataset(
+        data_vars={
+            "a": (
+                ("x", "y"),
+                [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
+            )
+        },
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]},
+    )
+
+    actual_x = ds.groupby("x").quantile(0, dim=...)
+    expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0})
+    assert_identical(expected_x, actual_x)
+
+    actual_y = ds.groupby("y").quantile(0, dim=...)
+    expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0})
+    assert_identical(expected_y, actual_y)
+
+    actual_xx = ds.groupby("x").quantile(0)
+    expected_xx = xr.Dataset(
+        {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])},
+        coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0},
+    )
+    assert_identical(expected_xx, actual_xx)
+
+    actual_yy = ds.groupby("y").quantile(0)
+    expected_yy = xr.Dataset(
+        {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])},
+        coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0},
+    ).transpose()
+    assert_identical(expected_yy, actual_yy)
+
+    times = pd.date_range("2000-01-01", periods=365)
+    x = [0, 1]
+    foo = xr.Dataset(
+        {"a": (("time", "x"), np.reshape(np.arange(365 * 2), (365, 2)))},
+        coords=dict(time=times, x=x),
+    )
+    g = foo.groupby(foo.time.dt.month)
+
+    actual = g.quantile(0, dim=...)
+    expected = xr.Dataset(
+        {
+            "a": (
+                "month",
+                [
+                    0.0,
+                    62.0,
+                    120.0,
+                    182.0,
+                    242.0,
+                    304.0,
+                    364.0,
+                    426.0,
+                    488.0,
+                    548.0,
+                    610.0,
+                    670.0,
+                ],
+            )
+        },
+        coords={"month": np.arange(1, 13), "quantile": 0},
+    )
+    assert_identical(expected, actual)
+
+    actual = g.quantile(0, dim="time").isel(month=slice(None, 2))
+    expected = xr.Dataset(
+        data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])},
+        coords={"month": [1, 2], "x": [0, 1], "quantile": 0},
+    )
     assert_identical(expected, actual)
 
 
@@ -285,7 +400,7 @@ def test_groupby_drops_nans():
     expected.variable.values[0, 0, :] = np.nan
     expected.variable.values[-1, -1, :] = np.nan
     expected.variable.values[3, 0, :] = np.nan
-    actual = grouped.apply(lambda x: x).transpose(*ds.variable.dims)
+    actual = grouped.map(lambda x: x).transpose(*ds.variable.dims)
     assert_identical(actual, expected)
 
     # reduction along grouped dimension
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index cfce5d6f645..0b410383a34 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -5,7 +5,13 @@
 import pytest
 
 import xarray as xr
-from xarray.core.missing import NumpyInterpolator, ScipyInterpolator, SplineInterpolator
+from xarray.core.missing import (
+    NumpyInterpolator,
+    ScipyInterpolator,
+    SplineInterpolator,
+    get_clean_interp_index,
+    _get_nan_block_lengths,
+)
 from xarray.core.pycompat import dask_array_type
 from xarray.tests import (
     assert_array_equal,
@@ -153,7 +159,7 @@ def test_interpolate_pd_compat_polynomial():
 def test_interpolate_unsorted_index_raises():
     vals = np.array([1, 2, 3], dtype=np.float64)
     expected = xr.DataArray(vals, dims="x", coords={"x": [2, 1, 3]})
-    with raises_regex(ValueError, "Index must be monotonicly increasing"):
+    with raises_regex(ValueError, "Index 'x' must be monotonically increasing"):
         expected.interpolate_na(dim="x", method="index")
 
 
@@ -169,12 +175,19 @@ def test_interpolate_invalid_interpolator_raises():
         da.interpolate_na(dim="x", method="foo")
 
 
+def test_interpolate_duplicate_values_raises():
+    data = np.random.randn(2, 3)
+    da = xr.DataArray(data, coords=[("x", ["a", "a"]), ("y", [0, 1, 2])])
+    with raises_regex(ValueError, "Index 'x' has duplicate values"):
+        da.interpolate_na(dim="x", method="foo")
+
+
 def test_interpolate_multiindex_raises():
     data = np.random.randn(2, 3)
     data[1, 1] = np.nan
     da = xr.DataArray(data, coords=[("x", ["a", "b"]), ("y", [0, 1, 2])])
     das = da.stack(z=("x", "y"))
-    with raises_regex(TypeError, "Index must be castable to float64"):
+    with raises_regex(TypeError, "Index 'z' must be castable to float64"):
         das.interpolate_na(dim="z")
 
 
@@ -439,3 +452,114 @@ def test_ffill_dataset(ds):
 @requires_bottleneck
 def test_bfill_dataset(ds):
     ds.ffill(dim="time")
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "y, lengths",
+    [
+        [np.arange(9), [[3, 3, 3, 0, 3, 3, 0, 2, 2]]],
+        [np.arange(9) * 3, [[9, 9, 9, 0, 9, 9, 0, 6, 6]]],
+        [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 4, 4, 0, 4, 4]]],
+    ],
+)
+def test_interpolate_na_nan_block_lengths(y, lengths):
+    arr = [[np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan]]
+    da = xr.DataArray(arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": y})
+    index = get_clean_interp_index(da, dim="y", use_coordinate=True)
+    actual = _get_nan_block_lengths(da, dim="y", index=index)
+    expected = da.copy(data=lengths * 2)
+    assert_equal(actual, expected)
+
+
+@pytest.fixture
+def da_time():
+    return xr.DataArray(
+        [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10],
+        dims=["t"],
+    )
+
+
+def test_interpolate_na_max_gap_errors(da_time):
+    with raises_regex(
+        NotImplementedError, "max_gap not implemented for unlabeled coordinates"
+    ):
+        da_time.interpolate_na("t", max_gap=1)
+
+    with raises_regex(ValueError, "max_gap must be a scalar."):
+        da_time.interpolate_na("t", max_gap=(1,))
+
+    da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11)
+    with raises_regex(TypeError, "Underlying index is"):
+        da_time.interpolate_na("t", max_gap=1)
+
+    with raises_regex(TypeError, "Expected integer or floating point"):
+        da_time.interpolate_na("t", max_gap="1H", use_coordinate=False)
+
+    with raises_regex(ValueError, "Could not convert 'huh' to timedelta64"):
+        da_time.interpolate_na("t", max_gap="huh")
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "time_range_func",
+    [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)],
+)
+@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")])
+@pytest.mark.parametrize(
+    "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")]
+)
+def test_interpolate_na_max_gap_time_specifier(
+    da_time, max_gap, transform, time_range_func
+):
+    da_time["t"] = time_range_func("2001-01-01", freq="H", periods=11)
+    expected = transform(
+        da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10])
+    )
+    actual = transform(da_time).interpolate_na("t", max_gap=max_gap)
+    assert_equal(actual, expected)
+
+
+@requires_bottleneck
+@pytest.mark.parametrize(
+    "coords",
+    [
+        pytest.param(None, marks=pytest.mark.xfail()),
+        {"x": np.arange(4), "y": np.arange(11)},
+    ],
+)
+def test_interpolate_na_2d(coords):
+    da = xr.DataArray(
+        [
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+        ],
+        dims=["x", "y"],
+        coords=coords,
+    )
+
+    actual = da.interpolate_na("y", max_gap=2)
+    expected_y = da.copy(
+        data=[
+            [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11],
+        ]
+    )
+    assert_equal(actual, expected_y)
+
+    actual = da.interpolate_na("x", max_gap=3)
+    expected_x = xr.DataArray(
+        [
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+            [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11],
+        ],
+        dims=["x", "y"],
+        coords=coords,
+    )
+    assert_equal(actual, expected_x)
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index 8e2d4b8e064..a02fef2faeb 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -339,7 +339,7 @@ def test_dataarray_property(prop):
         (do("copy"), True),
         (do("count"), False),
         (do("diff", "x"), True),
-        (do("drop", "x"), True),
+        (do("drop_vars", "x"), True),
         (do("expand_dims", {"z": 2}, axis=2), True),
         (do("get_axis_num", "x"), False),
         (do("get_index", "x"), False),
@@ -856,6 +856,10 @@ def test_dask_token():
     import dask
 
     s = sparse.COO.from_numpy(np.array([0, 0, 1, 2]))
+
+    # https://github.com/pydata/sparse/issues/300
+    s.__dask_tokenize__ = lambda: dask.base.normalize_token(s.__dict__)
+
     a = DataArray(s)
     t1 = dask.base.tokenize(a)
     t2 = dask.base.tokenize(a)
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index 80063f8b4bc..0be6f8af464 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -123,14 +123,19 @@ def extract_units(obj):
 
 def strip_units(obj):
     if isinstance(obj, xr.Dataset):
-        data_vars = {name: strip_units(value) for name, value in obj.data_vars.items()}
-        coords = {name: strip_units(value) for name, value in obj.coords.items()}
+        data_vars = {
+            strip_units(name): strip_units(value)
+            for name, value in obj.data_vars.items()
+        }
+        coords = {
+            strip_units(name): strip_units(value) for name, value in obj.coords.items()
+        }
 
         new_obj = xr.Dataset(data_vars=data_vars, coords=coords)
     elif isinstance(obj, xr.DataArray):
         data = array_strip_units(obj.data)
         coords = {
-            name: (
+            strip_units(name): (
                 (value.dims, array_strip_units(value.data))
                 if isinstance(value.data, Quantity)
                 else value  # to preserve multiindexes
@@ -138,9 +143,13 @@ def strip_units(obj):
             for name, value in obj.coords.items()
         }
 
-        new_obj = xr.DataArray(name=obj.name, data=data, coords=coords, dims=obj.dims)
-    elif hasattr(obj, "magnitude"):
+        new_obj = xr.DataArray(
+            name=strip_units(obj.name), data=data, coords=coords, dims=obj.dims
+        )
+    elif isinstance(obj, unit_registry.Quantity):
         new_obj = obj.magnitude
+    elif isinstance(obj, (list, tuple)):
+        return type(obj)(strip_units(elem) for elem in obj)
     else:
         new_obj = obj
 
@@ -191,6 +200,40 @@ def attach_units(obj, units):
     return new_obj
 
 
+def convert_units(obj, to):
+    if isinstance(obj, xr.Dataset):
+        data_vars = {
+            name: convert_units(array, to) for name, array in obj.data_vars.items()
+        }
+        coords = {name: convert_units(array, to) for name, array in obj.coords.items()}
+
+        new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs)
+    elif isinstance(obj, xr.DataArray):
+        name = obj.name
+
+        new_units = (
+            to.get(name, None) or to.get("data", None) or to.get(None, None) or 1
+        )
+        data = convert_units(obj.data, {None: new_units})
+
+        coords = {
+            name: (array.dims, convert_units(array.data, to))
+            for name, array in obj.coords.items()
+            if name != obj.name
+        }
+
+        new_obj = xr.DataArray(
+            name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims
+        )
+    elif isinstance(obj, unit_registry.Quantity):
+        units = to.get(None)
+        new_obj = obj.to(units) if units is not None else obj
+    else:
+        new_obj = obj
+
+    return new_obj
+
+
 def assert_equal_with_units(a, b):
     # works like xr.testing.assert_equal, but also explicitly checks units
     # so, it is more like assert_identical
@@ -266,31 +309,48 @@ def __repr__(self):
 
 
 class function:
-    def __init__(self, name):
-        self.name = name
-        self.func = getattr(np, name)
+    def __init__(self, name_or_function, *args, **kwargs):
+        if callable(name_or_function):
+            self.name = name_or_function.__name__
+            self.func = name_or_function
+        else:
+            self.name = name_or_function
+            self.func = getattr(np, name_or_function)
+            if self.func is None:
+                raise AttributeError(
+                    f"module 'numpy' has no attribute named '{self.name}'"
+                )
+
+        self.args = args
+        self.kwargs = kwargs
 
     def __call__(self, *args, **kwargs):
-        return self.func(*args, **kwargs)
+        all_args = list(self.args) + list(args)
+        all_kwargs = {**self.kwargs, **kwargs}
+
+        return self.func(*all_args, **all_kwargs)
 
     def __repr__(self):
         return f"function_{self.name}"
 
 
-@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
-def test_replication(func, dtype):
-    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
-    data_array = xr.DataArray(data=array, dims="x")
+def test_apply_ufunc_dataarray(dtype):
+    func = function(
+        xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
+    )
 
-    numpy_func = getattr(np, func.__name__)
-    expected = xr.DataArray(data=numpy_func(array), dims="x")
+    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.m
+    x = np.arange(20) * unit_registry.s
+    data_array = xr.DataArray(data=array, dims="x", coords={"x": x})
+
+    expected = attach_units(func(strip_units(data_array)), extract_units(data_array))
     result = func(data_array)
 
     assert_equal_with_units(expected, result)
 
 
 @pytest.mark.xfail(
-    reason="np.full_like on Variable strips the unit and pint does not allow mixed args"
+    reason="pint does not implement `np.result_type` and align strips units"
 )
 @pytest.mark.parametrize(
     "unit,error",
@@ -299,342 +359,2496 @@ def test_replication(func, dtype):
         pytest.param(
             unit_registry.dimensionless, DimensionalityError, id="dimensionless"
         ),
-        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
-        pytest.param(unit_registry.ms, None, id="compatible_unit"),
-        pytest.param(unit_registry.s, None, id="identical_unit"),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
     ),
+    ids=repr,
 )
-def test_replication_full_like(unit, error, dtype):
-    array = np.linspace(0, 5, 10) * unit_registry.s
-    data_array = xr.DataArray(data=array, dims="x")
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
+def test_align_dataarray(fill_value, variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
+    array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
+    x = np.arange(2) * original_unit
+    x_a1 = np.array([10, 5]) * original_unit
+    x_a2 = np.array([10, 5]) * coord_unit
+
+    y1 = np.arange(5) * original_unit
+    y2 = np.arange(2, 7) * dim_unit
+
+    data_array1 = xr.DataArray(
+        data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y")
+    )
+    data_array2 = xr.DataArray(
+        data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y")
+    )
 
-    fill_value = -1 * unit
+    fill_value = fill_value * data_unit
+    func = function(xr.align, join="outer", fill_value=fill_value)
     if error is not None:
         with pytest.raises(error):
-            xr.full_like(data_array, fill_value=fill_value)
-    else:
-        result = xr.full_like(data_array, fill_value=fill_value)
-        expected = np.full_like(array, fill_value=fill_value)
+            func(data_array1, data_array2)
 
-        assert_equal_with_units(expected, result)
+        return
 
+    stripped_kwargs = {
+        key: strip_units(
+            convert_units(value, {None: original_unit})
+            if isinstance(value, unit_registry.Quantity)
+            else value
+        )
+        for key, value in func.kwargs.items()
+    }
+    units = extract_units(data_array1)
+    # FIXME: should the expected_b have the same units as data_array1
+    # or data_array2?
+    expected_a, expected_b = tuple(
+        attach_units(elem, units)
+        for elem in func(
+            strip_units(data_array1),
+            strip_units(convert_units(data_array2, units)),
+            **stripped_kwargs,
+        )
+    )
+    result_a, result_b = func(data_array1, data_array2)
 
-class TestDataArray:
-    @pytest.mark.filterwarnings("error:::pint[.*]")
-    @pytest.mark.parametrize(
-        "variant",
-        (
-            pytest.param(
-                "with_dims",
-                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
-            ),
-            pytest.param("with_coords"),
-            pytest.param("without_coords"),
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
+
+
+@pytest.mark.xfail(
+    reason="pint does not implement `np.result_type` and align strips units"
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
         ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
+def test_align_dataset(fill_value, unit, variant, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
+    array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
+
+    x = np.arange(2) * original_unit
+    x_a1 = np.array([10, 5]) * original_unit
+    x_a2 = np.array([10, 5]) * coord_unit
+
+    y1 = np.arange(5) * original_unit
+    y2 = np.arange(2, 7) * dim_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("x", "y"), array1)},
+        coords={"x": x, "x_a": ("x", x_a1), "y": y1},
+    )
+    ds2 = xr.Dataset(
+        data_vars={"a": (("x", "y"), array2)},
+        coords={"x": x, "x_a": ("x", x_a2), "y": y2},
     )
-    def test_init(self, variant, dtype):
-        array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m
-
-        x = np.arange(len(array)) * unit_registry.s
-        y = x.to(unit_registry.ms)
 
-        variants = {
-            "with_dims": {"x": x},
-            "with_coords": {"y": ("x", y)},
-            "without_coords": {},
-        }
+    fill_value = fill_value * data_unit
+    func = function(xr.align, join="outer", fill_value=fill_value)
+    if error is not None:
+        with pytest.raises(error):
+            func(ds1, ds2)
 
-        kwargs = {"data": array, "dims": "x", "coords": variants.get(variant)}
-        data_array = xr.DataArray(**kwargs)
+        return
 
-        assert isinstance(data_array.data, Quantity)
-        assert all(
-            {
-                name: isinstance(coord.data, Quantity)
-                for name, coord in data_array.coords.items()
-            }.values()
+    stripped_kwargs = {
+        key: strip_units(
+            convert_units(value, {None: original_unit})
+            if isinstance(value, unit_registry.Quantity)
+            else value
+        )
+        for key, value in func.kwargs.items()
+    }
+    units = extract_units(ds1)
+    # FIXME: should the expected_b have the same units as ds1 or ds2?
+    expected_a, expected_b = tuple(
+        attach_units(elem, units)
+        for elem in func(
+            strip_units(ds1), strip_units(convert_units(ds2, units)), **stripped_kwargs
         )
-
-    @pytest.mark.filterwarnings("error:::pint[.*]")
-    @pytest.mark.parametrize(
-        "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
-    )
-    @pytest.mark.parametrize(
-        "variant",
-        (
-            pytest.param(
-                "with_dims",
-                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
-            ),
-            pytest.param("with_coords"),
-            pytest.param("without_coords"),
-        ),
     )
-    def test_repr(self, func, variant, dtype):
-        array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m
-        x = np.arange(len(array)) * unit_registry.s
-        y = x.to(unit_registry.ms)
+    result_a, result_b = func(ds1, ds2)
 
-        variants = {
-            "with_dims": {"x": x},
-            "with_coords": {"y": ("x", y)},
-            "without_coords": {},
-        }
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
 
-        kwargs = {"data": array, "dims": "x", "coords": variants.get(variant)}
-        data_array = xr.DataArray(**kwargs)
 
-        # FIXME: this just checks that the repr does not raise
-        # warnings or errors, but does not check the result
-        func(data_array)
+def test_broadcast_dataarray(dtype):
+    array1 = np.linspace(0, 10, 2) * unit_registry.Pa
+    array2 = np.linspace(0, 10, 3) * unit_registry.Pa
 
-    @pytest.mark.parametrize(
-        "func",
-        (
-            pytest.param(
-                function("all"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                function("any"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                function("argmax"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
-            pytest.param(
-                function("argmin"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
-            function("max"),
-            function("mean"),
-            pytest.param(
-                function("median"),
-                marks=pytest.mark.xfail(
-                    reason="np.median on DataArray strips the units"
-                ),
-            ),
-            function("min"),
-            pytest.param(
-                function("prod"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                function("sum"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
-            function("std"),
-            function("var"),
-            function("cumsum"),
-            pytest.param(
-                function("cumprod"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                method("all"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                method("any"),
-                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
-            ),
-            pytest.param(
-                method("argmax"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantities with ndarrays in nanops not implemented"
-                ),
-            ),
-            pytest.param(
-                method("argmin"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantities with ndarrays in nanops not implemented"
-                ),
-            ),
-            method("max"),
-            method("mean"),
-            method("median"),
-            method("min"),
-            pytest.param(
-                method("prod"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
-            pytest.param(
-                method("sum"),
-                marks=pytest.mark.xfail(
-                    reason="comparison of quantity with ndarrays in nanops not implemented"
-                ),
-            ),
-            method("std"),
-            method("var"),
-            method("cumsum"),
-            pytest.param(
-                method("cumprod"),
-                marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"),
-            ),
-        ),
-        ids=repr,
+    a = xr.DataArray(data=array1, dims="x")
+    b = xr.DataArray(data=array2, dims="y")
+
+    expected_a, expected_b = tuple(
+        attach_units(elem, extract_units(a))
+        for elem in xr.broadcast(strip_units(a), strip_units(b))
     )
-    def test_aggregation(self, func, dtype):
-        array = np.arange(10).astype(dtype) * unit_registry.m
-        data_array = xr.DataArray(data=array)
+    result_a, result_b = xr.broadcast(a, b)
 
-        expected = xr.DataArray(data=func(array))
-        result = func(data_array)
+    assert_equal_with_units(expected_a, result_a)
+    assert_equal_with_units(expected_b, result_b)
 
-        assert_equal_with_units(expected, result)
 
-    @pytest.mark.parametrize(
-        "func",
-        (
-            pytest.param(operator.neg, id="negate"),
-            pytest.param(abs, id="absolute"),
-            pytest.param(
-                np.round,
-                id="round",
-                marks=pytest.mark.xfail(reason="pint does not implement round"),
-            ),
-        ),
+def test_broadcast_dataset(dtype):
+    array1 = np.linspace(0, 10, 2) * unit_registry.Pa
+    array2 = np.linspace(0, 10, 3) * unit_registry.Pa
+
+    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)})
+
+    (expected,) = tuple(
+        attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds))
     )
-    def test_unary_operations(self, func, dtype):
+    (result,) = xr.broadcast(ds)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`combine_by_coords` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_combine_by_coords(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    x = np.arange(1, 4) * 10 * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    other_array1 = np.ones_like(array1) * data_unit
+    other_array2 = np.ones_like(array2) * data_unit
+    other_x = np.arange(1, 4) * 10 * dim_unit
+    other_y = np.arange(2, 4) * dim_unit
+    other_z = np.arange(3, 6) * coord_unit
+
+    ds = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    other = xr.Dataset(
+        data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)},
+        coords={"x": other_x, "y": other_y, "z": ("x", other_z)},
+    )
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.combine_by_coords([ds, other])
+
+        return
+
+    units = extract_units(ds)
+    expected = attach_units(
+        xr.combine_by_coords(
+            [strip_units(ds), strip_units(convert_units(other, units))]
+        ),
+        units,
+    )
+    result = xr.combine_by_coords([ds, other])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_combine_nested(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+
+    x = np.arange(1, 4) * 10 * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    ds2 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(3) * dim_unit,
+            "y": np.arange(2, 4) * dim_unit,
+            "z": ("x", np.arange(-3, 0) * coord_unit),
+        },
+    )
+    ds3 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
+            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
+        },
+        coords={
+            "x": np.arange(3, 6) * dim_unit,
+            "y": np.arange(4, 6) * dim_unit,
+            "z": ("x", np.arange(3, 6) * coord_unit),
+        },
+    )
+    ds4 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(6, 9) * dim_unit,
+            "y": np.arange(6, 8) * dim_unit,
+            "z": ("x", np.arange(6, 9) * coord_unit),
+        },
+    )
+
+    func = function(xr.combine_nested, concat_dim=["x", "y"])
+    if error is not None:
+        with pytest.raises(error):
+            func([[ds1, ds2], [ds3, ds4]])
+
+        return
+
+    units = extract_units(ds1)
+    convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
+    expected = attach_units(
+        func(
+            [
+                [strip_units(ds1), convert_and_strip(ds2)],
+                [convert_and_strip(ds3), convert_and_strip(ds4)],
+            ]
+        ),
+        units,
+    )
+    result = func([[ds1, ds2], [ds3, ds4]])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`concat` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+    ),
+)
+def test_concat_dataarray(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
+    data_unit, dims_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
+    x1 = np.arange(5, 15) * original_unit
+    x2 = np.arange(5) * dims_unit
+
+    arr1 = xr.DataArray(data=array1, coords={"x": x1}, dims="x")
+    arr2 = xr.DataArray(data=array2, coords={"x": x2}, dims="x")
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.concat([arr1, arr2], dim="x")
+
+        return
+
+    expected = attach_units(
+        xr.concat([strip_units(arr1), strip_units(arr2)], dim="x"), extract_units(arr1)
+    )
+    result = xr.concat([arr1, arr2], dim="x")
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`concat` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+    ),
+)
+def test_concat_dataset(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {"data": (unit, original_unit), "dims": (original_unit, unit)}
+    data_unit, dims_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit
+    x1 = np.arange(5, 15) * original_unit
+    x2 = np.arange(5) * dims_unit
+
+    ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1})
+    ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2})
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.concat([ds1, ds2], dim="x")
+
+        return
+
+    expected = attach_units(
+        xr.concat([strip_units(ds1), strip_units(ds2)], dim="x"), extract_units(ds1)
+    )
+    result = xr.concat([ds1, ds2], dim="x")
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_merge_dataarray(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit
+    array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit
+    array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit
+
+    x = np.arange(2) * original_unit
+    y = np.arange(3) * original_unit
+    z = np.arange(4) * original_unit
+    u = np.linspace(10, 20, 2) * original_unit
+    v = np.linspace(10, 20, 3) * original_unit
+    w = np.linspace(10, 20, 4) * original_unit
+
+    arr1 = xr.DataArray(
+        name="a",
+        data=array1,
+        coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)},
+        dims=("x", "y"),
+    )
+    arr2 = xr.DataArray(
+        name="b",
+        data=array2,
+        coords={
+            "x": np.arange(2, 4) * dim_unit,
+            "z": z,
+            "u": ("x", np.linspace(20, 30, 2) * coord_unit),
+            "w": ("z", w),
+        },
+        dims=("x", "z"),
+    )
+    arr3 = xr.DataArray(
+        name="c",
+        data=array3,
+        coords={
+            "y": np.arange(3, 6) * dim_unit,
+            "z": np.arange(4, 8) * dim_unit,
+            "v": ("y", np.linspace(10, 20, 3) * coord_unit),
+            "w": ("z", np.linspace(10, 20, 4) * coord_unit),
+        },
+        dims=("y", "z"),
+    )
+
+    func = function(xr.merge)
+    if error is not None:
+        with pytest.raises(error):
+            func([arr1, arr2, arr3])
+
+        return
+
+    units = {name: original_unit for name in list("abcuvwxyz")}
+    convert_and_strip = lambda arr: strip_units(convert_units(arr, units))
+    expected = attach_units(
+        func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]),
+        units,
+    )
+    result = func([arr1, arr2, arr3])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="blocked by `where`")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize(
+    "variant",
+    (
+        "data",
+        pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")),
+        "coords",
+    ),
+)
+def test_merge_dataset(variant, unit, error, dtype):
+    original_unit = unit_registry.m
+
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
+    data_unit, dim_unit, coord_unit = variants.get(variant)
+
+    array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+    array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit
+
+    x = np.arange(11, 14) * original_unit
+    y = np.arange(2) * original_unit
+    z = np.arange(3) * original_unit
+
+    ds1 = xr.Dataset(
+        data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
+        coords={"x": x, "y": y, "z": ("x", z)},
+    )
+    ds2 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.ones_like(array1) * data_unit),
+            "b": (("y", "x"), np.ones_like(array2) * data_unit),
+        },
+        coords={
+            "x": np.arange(3) * dim_unit,
+            "y": np.arange(2, 4) * dim_unit,
+            "z": ("x", np.arange(-3, 0) * coord_unit),
+        },
+    )
+    ds3 = xr.Dataset(
+        data_vars={
+            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
+            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
+        },
+        coords={
+            "x": np.arange(3, 6) * dim_unit,
+            "y": np.arange(4, 6) * dim_unit,
+            "z": ("x", np.arange(3, 6) * coord_unit),
+        },
+    )
+
+    func = function(xr.merge)
+    if error is not None:
+        with pytest.raises(error):
+            func([ds1, ds2, ds3])
+
+        return
+
+    units = extract_units(ds1)
+    convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
+    expected = attach_units(
+        func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units
+    )
+    result = func([ds1, ds2, ds3])
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
+def test_replication_dataarray(func, dtype):
+    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    data_array = xr.DataArray(data=array, dims="x")
+
+    numpy_func = getattr(np, func.__name__)
+    expected = xr.DataArray(data=numpy_func(array), dims="x")
+    result = func(data_array)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
+def test_replication_dataset(func, dtype):
+    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
+    x = np.arange(20).astype(dtype) * unit_registry.m
+    y = np.arange(10).astype(dtype) * unit_registry.m
+    z = y.to(unit_registry.mm)
+
+    ds = xr.Dataset(
+        data_vars={"a": ("x", array1), "b": ("y", array2)},
+        coords={"x": x, "y": y, "z": ("y", z)},
+    )
+
+    numpy_func = getattr(np, func.__name__)
+    expected = ds.copy(
+        data={name: numpy_func(array.data) for name, array in ds.data_vars.items()}
+    )
+    result = func(ds)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(
+    reason=(
+        "pint is undecided on how `full_like` should work, so incorrect errors "
+        "may be expected: hgrecco/pint#882"
+    )
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.ms, None, id="compatible_unit"),
+        pytest.param(unit_registry.s, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+def test_replication_full_like_dataarray(unit, error, dtype):
+    array = np.linspace(0, 5, 10) * unit_registry.s
+    data_array = xr.DataArray(data=array, dims="x")
+
+    fill_value = -1 * unit
+    if error is not None:
+        with pytest.raises(error):
+            xr.full_like(data_array, fill_value=fill_value)
+    else:
+        result = xr.full_like(data_array, fill_value=fill_value)
+        expected = np.full_like(array, fill_value=fill_value)
+
+        assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(
+    reason=(
+        "pint is undecided on how `full_like` should work, so incorrect errors "
+        "may be expected: hgrecco/pint#882"
+    )
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.ms, None, id="compatible_unit"),
+        pytest.param(unit_registry.s, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+def test_replication_full_like_dataset(unit, error, dtype):
+    array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa
+    x = np.arange(20).astype(dtype) * unit_registry.m
+    y = np.arange(10).astype(dtype) * unit_registry.m
+    z = y.to(unit_registry.mm)
+
+    ds = xr.Dataset(
+        data_vars={"a": ("x", array1), "b": ("y", array2)},
+        coords={"x": x, "y": y, "z": ("y", z)},
+    )
+
+    fill_value = -1 * unit
+    if error is not None:
+        with pytest.raises(error):
+            xr.full_like(ds, fill_value=fill_value)
+
+        return
+
+    expected = ds.copy(
+        data={
+            name: np.full_like(array, fill_value=fill_value)
+            for name, array in ds.data_vars.items()
+        }
+    )
+    result = xr.full_like(ds, fill_value=fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`where` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize("fill_value", (np.nan, 10.2))
+def test_where_dataarray(fill_value, unit, error, dtype):
+    array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+
+    x = xr.DataArray(data=array, dims="x")
+    cond = x < 5 * unit_registry.m
+    # FIXME: this should work without wrapping in array()
+    fill_value = np.array(fill_value) * unit
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.where(cond, x, fill_value)
+
+        return
+
+    fill_value_ = (
+        fill_value.to(unit_registry.m)
+        if isinstance(fill_value, unit_registry.Quantity)
+        and fill_value.check(unit_registry.m)
+        else fill_value
+    )
+    expected = attach_units(
+        xr.where(cond, strip_units(x), strip_units(fill_value_)), extract_units(x)
+    )
+    result = xr.where(cond, x, fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="`where` strips units")
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
+        pytest.param(unit_registry.m, None, id="identical_unit"),
+    ),
+    ids=repr,
+)
+@pytest.mark.parametrize("fill_value", (np.nan, 10.2))
+def test_where_dataset(fill_value, unit, error, dtype):
+    array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+    array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m
+    x = np.arange(10) * unit_registry.s
+
+    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x})
+    cond = ds.x < 5 * unit_registry.s
+    # FIXME: this should work without wrapping in array()
+    fill_value = np.array(fill_value) * unit
+
+    if error is not None:
+        with pytest.raises(error):
+            xr.where(cond, ds, fill_value)
+
+        return
+
+    fill_value_ = (
+        fill_value.to(unit_registry.m)
+        if isinstance(fill_value, unit_registry.Quantity)
+        and fill_value.check(unit_registry.m)
+        else fill_value
+    )
+    expected = attach_units(
+        xr.where(cond, strip_units(ds), strip_units(fill_value_)), extract_units(ds)
+    )
+    result = xr.where(cond, ds, fill_value)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(reason="pint does not implement `np.einsum`")
+def test_dot_dataarray(dtype):
+    array1 = (
+        np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
+        * unit_registry.m
+        / unit_registry.s
+    )
+    array2 = (
+        np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s
+    )
+
+    arr1 = xr.DataArray(data=array1, dims=("x", "y"))
+    arr2 = xr.DataArray(data=array2, dims=("y", "z"))
+
+    expected = array1.dot(array2)
+    result = xr.dot(arr1, arr2)
+
+    assert_equal_with_units(expected, result)
+
+
+class TestDataArray:
+    @pytest.mark.filterwarnings("error:::pint[.*]")
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            pytest.param(
+                "with_dims",
+                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
+            ),
+            pytest.param("with_coords"),
+            pytest.param("without_coords"),
+        ),
+    )
+    def test_init(self, variant, dtype):
+        array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m
+
+        x = np.arange(len(array)) * unit_registry.s
+        y = x.to(unit_registry.ms)
+
+        variants = {
+            "with_dims": {"x": x},
+            "with_coords": {"y": ("x", y)},
+            "without_coords": {},
+        }
+
+        kwargs = {"data": array, "dims": "x", "coords": variants.get(variant)}
+        data_array = xr.DataArray(**kwargs)
+
+        assert isinstance(data_array.data, Quantity)
+        assert all(
+            {
+                name: isinstance(coord.data, Quantity)
+                for name, coord in data_array.coords.items()
+            }.values()
+        )
+
+    @pytest.mark.filterwarnings("error:::pint[.*]")
+    @pytest.mark.parametrize(
+        "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
+    )
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            pytest.param(
+                "with_dims",
+                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
+            ),
+            pytest.param("with_coords"),
+            pytest.param("without_coords"),
+        ),
+    )
+    def test_repr(self, func, variant, dtype):
+        array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m
+        x = np.arange(len(array)) * unit_registry.s
+        y = x.to(unit_registry.ms)
+
+        variants = {
+            "with_dims": {"x": x},
+            "with_coords": {"y": ("x", y)},
+            "without_coords": {},
+        }
+
+        kwargs = {"data": array, "dims": "x", "coords": variants.get(variant)}
+        data_array = xr.DataArray(**kwargs)
+
+        # FIXME: this just checks that the repr does not raise
+        # warnings or errors, but does not check the result
+        func(data_array)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                function("all"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                function("any"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                function("argmax"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            pytest.param(
+                function("argmin"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            function("max"),
+            function("mean"),
+            pytest.param(
+                function("median"),
+                marks=pytest.mark.xfail(
+                    reason="np.median on DataArray strips the units"
+                ),
+            ),
+            function("min"),
+            pytest.param(
+                function("prod"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                function("sum"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            function("std"),
+            function("var"),
+            function("cumsum"),
+            pytest.param(
+                function("cumprod"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                method("all"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                method("any"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                method("argmax"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantities with ndarrays in nanops not implemented"
+                ),
+            ),
+            pytest.param(
+                method("argmin"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantities with ndarrays in nanops not implemented"
+                ),
+            ),
+            method("max"),
+            method("mean"),
+            method("median"),
+            method("min"),
+            pytest.param(
+                method("prod"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            pytest.param(
+                method("sum"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            method("std"),
+            method("var"),
+            method("cumsum"),
+            pytest.param(
+                method("cumprod"),
+                marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_aggregation(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(data=func(array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(operator.neg, id="negate"),
+            pytest.param(abs, id="absolute"),
+            pytest.param(
+                np.round,
+                id="round",
+                marks=pytest.mark.xfail(reason="pint does not implement round"),
+            ),
+        ),
+    )
+    def test_unary_operations(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(data=func(array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(lambda x: 2 * x, id="multiply"),
+            pytest.param(lambda x: x + x, id="add"),
+            pytest.param(lambda x: x[0] + x, id="add scalar"),
+            pytest.param(
+                lambda x: x.T @ x,
+                id="matrix multiply",
+                marks=pytest.mark.xfail(
+                    reason="pint does not support matrix multiplication yet"
+                ),
+            ),
+        ),
+    )
+    def test_binary_operations(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(data=func(array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "comparison",
+        (
+            pytest.param(operator.lt, id="less_than"),
+            pytest.param(operator.ge, id="greater_equal"),
+            pytest.param(operator.eq, id="equal"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, ValueError, id="without_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incorrect_unit"),
+            pytest.param(unit_registry.m, None, id="correct_unit"),
+        ),
+    )
+    def test_comparison_operations(self, comparison, unit, error, dtype):
+        array = (
+            np.array([10.1, 5.2, 6.5, 8.0, 21.3, 7.1, 1.3]).astype(dtype)
+            * unit_registry.m
+        )
+        data_array = xr.DataArray(data=array)
+
+        value = 8
+        to_compare_with = value * unit
+
+        # incompatible units are all not equal
+        if error is not None and comparison is not operator.eq:
+            with pytest.raises(error):
+                comparison(array, to_compare_with)
+
+            with pytest.raises(error):
+                comparison(data_array, to_compare_with)
+        else:
+            result = comparison(data_array, to_compare_with)
+            # pint compares incompatible arrays to False, so we need to extend
+            # the multiplication works for both scalar and array results
+            expected = xr.DataArray(
+                data=comparison(array, to_compare_with)
+                * np.ones_like(array, dtype=bool)
+            )
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "units,error",
+        (
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.m, DimensionalityError, id="incorrect unit"),
+            pytest.param(unit_registry.degree, None, id="correct unit"),
+        ),
+    )
+    def test_univariate_ufunc(self, units, error, dtype):
+        array = np.arange(10).astype(dtype) * units
+        data_array = xr.DataArray(data=array)
+
+        if error is not None:
+            with pytest.raises(error):
+                np.sin(data_array)
+        else:
+            expected = xr.DataArray(data=np.sin(array))
+            result = np.sin(data_array)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="pint's implementation of `np.maximum` strips units")
+    def test_bivariate_ufunc(self, dtype):
+        unit = unit_registry.m
+        array = np.arange(10).astype(dtype) * unit
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(np.maximum(array, 0 * unit))
+
+        assert_equal_with_units(expected, np.maximum(data_array, 0 * unit))
+        assert_equal_with_units(expected, np.maximum(0 * unit, data_array))
+
+    @pytest.mark.parametrize("property", ("T", "imag", "real"))
+    def test_numpy_properties(self, property, dtype):
+        array = (
+            np.arange(5 * 10).astype(dtype)
+            + 1j * np.linspace(-1, 0, 5 * 10).astype(dtype)
+        ).reshape(5, 10) * unit_registry.s
+        data_array = xr.DataArray(data=array, dims=("x", "y"))
+
+        expected = xr.DataArray(
+            data=getattr(array, property),
+            dims=("x", "y")[:: 1 if property != "T" else -1],
+        )
+        result = getattr(data_array, property)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("conj"),
+            method("argsort"),
+            method("conjugate"),
+            method("round"),
+            pytest.param(
+                method("rank", dim="x"),
+                marks=pytest.mark.xfail(reason="pint does not implement rank yet"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_numpy_methods(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array, dims="x")
+
+        expected = xr.DataArray(func(array), dims="x")
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func", (method("clip", min=3, max=8), method("searchsorted", v=5)), ids=repr
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_numpy_methods_with_args(self, func, unit, error, dtype):
         array = np.arange(10).astype(dtype) * unit_registry.m
         data_array = xr.DataArray(data=array)
 
-        expected = xr.DataArray(data=func(array))
+        scalar_types = (int, float)
+        kwargs = {
+            key: (value * unit if isinstance(value, scalar_types) else value)
+            for key, value in func.kwargs.items()
+        }
+
+        if error is not None:
+            with pytest.raises(error):
+                func(data_array, **kwargs)
+        else:
+            expected = func(array, **kwargs)
+            if func.name not in ["searchsorted"]:
+                expected = xr.DataArray(data=expected)
+            result = func(data_array, **kwargs)
+
+            if func.name in ["searchsorted"]:
+                assert np.allclose(expected, result)
+            else:
+                assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func", (method("isnull"), method("notnull"), method("count")), ids=repr
+    )
+    def test_missing_value_detection(self, func, dtype):
+        array = (
+            np.array(
+                [
+                    [1.4, 2.3, np.nan, 7.2],
+                    [np.nan, 9.7, np.nan, np.nan],
+                    [2.1, np.nan, np.nan, 4.6],
+                    [9.9, np.nan, 7.2, 9.1],
+                ]
+            )
+            * unit_registry.degK
+        )
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * unit_registry.m
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+
+        expected = func(strip_units(data_array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="ffill and bfill lose units in data")
+    @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
+    def test_missing_value_filling(self, func, dtype):
+        array = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.degK
+        )
+        x = np.arange(len(array))
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        result_without_units = func(strip_units(data_array), dim="x")
+        result = xr.DataArray(
+            data=result_without_units.data * unit_registry.degK,
+            coords={"x": x},
+            dims=["x"],
+        )
+
+        expected = attach_units(
+            func(strip_units(data_array), dim="x"), {"data": unit_registry.degK}
+        )
+        result = func(data_array, dim="x")
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="fillna drops the unit")
+    @pytest.mark.parametrize(
+        "fill_value",
+        (
+            pytest.param(
+                -1,
+                id="python scalar",
+                marks=pytest.mark.xfail(
+                    reason="python scalar cannot be converted using astype()"
+                ),
+            ),
+            pytest.param(np.array(-1), id="numpy scalar"),
+            pytest.param(np.array([-1]), id="numpy array"),
+        ),
+    )
+    def test_fillna(self, fill_value, dtype):
+        unit = unit_registry.m
+        array = np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * unit
+        data_array = xr.DataArray(data=array)
+
+        expected = attach_units(
+            strip_units(data_array).fillna(value=fill_value), {"data": unit}
+        )
+        result = data_array.fillna(value=fill_value * unit)
+
+        assert_equal_with_units(expected, result)
+
+    def test_dropna(self, dtype):
+        array = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.m
+        )
+        x = np.arange(len(array))
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        expected = attach_units(
+            strip_units(data_array).dropna(dim="x"), {"data": unit_registry.m}
+        )
+        result = data_array.dropna(dim="x")
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="pint does not implement `numpy.isin`")
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
+            pytest.param(unit_registry.m, id="same_unit"),
+        ),
+    )
+    def test_isin(self, unit, dtype):
+        array = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.m
+        )
+        data_array = xr.DataArray(data=array, dims="x")
+
+        raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype)
+        values = raw_values * unit
+
+        result_without_units = strip_units(data_array).isin(raw_values)
+        if unit != unit_registry.m:
+            result_without_units[:] = False
+        result_with_units = data_array.isin(values)
+
+        assert_equal_with_units(result_without_units, result_with_units)
+
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            pytest.param(
+                "masking",
+                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
+            ),
+            pytest.param(
+                "replacing_scalar",
+                marks=pytest.mark.xfail(reason="scalar not convertible using astype"),
+            ),
+            pytest.param(
+                "replacing_array",
+                marks=pytest.mark.xfail(
+                    reason="replacing using an array drops the units"
+                ),
+            ),
+            pytest.param(
+                "dropping",
+                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
+            ),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="same_unit"),
+        ),
+    )
+    def test_where(self, variant, unit, error, dtype):
+        def _strip_units(mapping):
+            return {key: array_strip_units(value) for key, value in mapping.items()}
+
+        original_unit = unit_registry.m
+        array = np.linspace(0, 1, 10).astype(dtype) * original_unit
+
+        data_array = xr.DataArray(data=array)
+
+        condition = data_array < 0.5 * original_unit
+        other = np.linspace(-2, -1, 10).astype(dtype) * unit
+        variant_kwargs = {
+            "masking": {"cond": condition},
+            "replacing_scalar": {"cond": condition, "other": -1 * unit},
+            "replacing_array": {"cond": condition, "other": other},
+            "dropping": {"cond": condition, "drop": True},
+        }
+        kwargs = variant_kwargs.get(variant)
+        kwargs_without_units = _strip_units(kwargs)
+
+        if variant not in ("masking", "dropping") and error is not None:
+            with pytest.raises(error):
+                data_array.where(**kwargs)
+        else:
+            expected = attach_units(
+                strip_units(array).where(**kwargs_without_units),
+                {"data": original_unit},
+            )
+            result = data_array.where(**kwargs)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="interpolate strips units")
+    def test_interpolate_na(self, dtype):
+        array = (
+            np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1])
+            * unit_registry.m
+        )
+        x = np.arange(len(array))
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype)
+
+        expected = attach_units(
+            strip_units(data_array).interpolate_na(dim="x"), {"data": unit_registry.m}
+        )
+        result = data_array.interpolate_na(dim="x")
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="uses DataArray.where, which currently fails")
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_combine_first(self, unit, error, dtype):
+        array = np.zeros(shape=(2, 2), dtype=dtype) * unit_registry.m
+        other_array = np.ones_like(array) * unit
+
+        data_array = xr.DataArray(
+            data=array, coords={"x": ["a", "b"], "y": [-1, 0]}, dims=["x", "y"]
+        )
+        other = xr.DataArray(
+            data=other_array, coords={"x": ["b", "c"], "y": [0, 1]}, dims=["x", "y"]
+        )
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.combine_first(other)
+        else:
+            expected = attach_units(
+                strip_units(data_array).combine_first(strip_units(other)),
+                {"data": unit_registry.m},
+            )
+            result = data_array.combine_first(other)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(
+                unit_registry.cm,
+                id="compatible_unit",
+                marks=pytest.mark.xfail(reason="identical does not check units yet"),
+            ),
+            pytest.param(unit_registry.m, id="identical_unit"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "variation",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="units in indexes not supported")
+            ),
+            "coords",
+        ),
+    )
+    @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
+    def test_comparisons(self, func, variation, unit, dtype):
+        data = np.linspace(0, 5, 10).astype(dtype)
+        coord = np.arange(len(data)).astype(dtype)
+
+        base_unit = unit_registry.m
+        quantity = data * base_unit
+        x = coord * base_unit
+        y = coord * base_unit
+
+        units = {
+            "data": (unit, base_unit, base_unit),
+            "dims": (base_unit, unit, base_unit),
+            "coords": (base_unit, base_unit, unit),
+        }
+        data_unit, dim_unit, coord_unit = units.get(variation)
+
+        data_array = xr.DataArray(
+            data=quantity, coords={"x": x, "y": ("x", y)}, dims="x"
+        )
+
+        other = attach_units(
+            strip_units(data_array),
+            {
+                None: (data_unit, base_unit if quantity.check(data_unit) else None),
+                "x": (dim_unit, base_unit if x.check(dim_unit) else None),
+                "y": (coord_unit, base_unit if y.check(coord_unit) else None),
+            },
+        )
+
+        # TODO: test dim coord once indexes leave units intact
+        # also, express this in terms of calls on the raw data array
+        # and then check the units
+        equal_arrays = (
+            np.all(quantity == other.data)
+            and (np.all(x == other.x.data) or True)  # dims can't be checked yet
+            and np.all(y == other.y.data)
+        )
+        equal_units = (
+            data_unit == unit_registry.m
+            and coord_unit == unit_registry.m
+            and dim_unit == unit_registry.m
+        )
+        expected = equal_arrays and (func.name != "identical" or equal_units)
+        result = func(data_array, other)
+
+        assert expected == result
+
+    @pytest.mark.xfail(reason="blocked by `where`")
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
+            pytest.param(unit_registry.m, id="identical_unit"),
+        ),
+    )
+    def test_broadcast_like(self, unit, dtype):
+        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa
+        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa
+
+        x1 = np.arange(2) * unit_registry.m
+        x2 = np.arange(2) * unit
+        y1 = np.array([0]) * unit_registry.m
+        y2 = np.arange(3) * unit
+
+        arr1 = xr.DataArray(data=array1, coords={"x": x1, "y": y1}, dims=("x", "y"))
+        arr2 = xr.DataArray(data=array2, coords={"x": x2, "y": y2}, dims=("x", "y"))
+
+        expected = attach_units(
+            strip_units(arr1).broadcast_like(strip_units(arr2)), extract_units(arr1)
+        )
+        result = arr1.broadcast_like(arr2)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
+            pytest.param(unit_registry.m, id="identical_unit"),
+        ),
+    )
+    def test_broadcast_equals(self, unit, dtype):
+        left_array = np.ones(shape=(2, 2), dtype=dtype) * unit_registry.m
+        right_array = array_attach_units(
+            np.ones(shape=(2,), dtype=dtype),
+            unit,
+            convert_from=unit_registry.m if left_array.check(unit) else None,
+        )
+
+        left = xr.DataArray(data=left_array, dims=("x", "y"))
+        right = xr.DataArray(data=right_array, dims="x")
+
+        expected = np.all(left_array == right_array[:, None])
+        result = left.broadcast_equals(right)
+
+        assert expected == result
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("pipe", lambda da: da * 10),
+            method("assign_coords", y2=("y", np.arange(10) * unit_registry.mm)),
+            method("assign_attrs", attr1="value"),
+            method("rename", x2="x_mm"),
+            method("swap_dims", {"x": "x2"}),
+            method(
+                "expand_dims",
+                dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
+                axis=1,
+            ),
+            method("drop_sel", labels="x"),
+            method("reset_coords", names="x2"),
+            method("copy"),
+            pytest.param(
+                method("astype", np.float32),
+                marks=pytest.mark.xfail(reason="units get stripped"),
+            ),
+            pytest.param(
+                method("item", 1), marks=pytest.mark.xfail(reason="units get stripped")
+            ),
+        ),
+        ids=repr,
+    )
+    def test_content_manipulation(self, func, dtype):
+        quantity = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
+            * unit_registry.pascal
+        )
+        x = np.arange(quantity.shape[0]) * unit_registry.m
+        y = np.arange(quantity.shape[1]) * unit_registry.m
+        x2 = x.to(unit_registry.mm)
+
+        data_array = xr.DataArray(
+            name="data",
+            data=quantity,
+            coords={"x": x, "x2": ("x", x2), "y": y},
+            dims=("x", "y"),
+        )
+
+        stripped_kwargs = {
+            key: array_strip_units(value) for key, value in func.kwargs.items()
+        }
+        expected = attach_units(
+            func(strip_units(data_array), **stripped_kwargs),
+            {
+                "data": quantity.units,
+                "x": x.units,
+                "x_mm": x2.units,
+                "x2": x2.units,
+                "y": y.units,
+            },
+        )
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                method("drop_sel", labels=dict(x=np.array([1, 5]))),
+                marks=pytest.mark.xfail(
+                    reason="selecting using incompatible units does not raise"
+                ),
+            ),
+            pytest.param(method("copy", data=np.arange(20))),
+        ),
+        ids=repr,
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_content_manipulation_with_units(self, func, unit, error, dtype):
+        quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal
+        x = np.arange(len(quantity)) * unit_registry.m
+
+        data_array = xr.DataArray(name="data", data=quantity, coords={"x": x}, dims="x")
+
+        kwargs = {
+            key: (value * unit if isinstance(value, np.ndarray) else value)
+            for key, value in func.kwargs.items()
+        }
+        stripped_kwargs = func.kwargs
+
+        expected = attach_units(
+            func(strip_units(data_array), **stripped_kwargs),
+            {"data": quantity.units if func.name == "drop_sel" else unit, "x": x.units},
+        )
+        if error is not None and func.name == "drop_sel":
+            with pytest.raises(error):
+                func(data_array, **kwargs)
+        else:
+            result = func(data_array, **kwargs)
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "indices",
+        (
+            pytest.param(4, id="single index"),
+            pytest.param([5, 2, 9, 1], id="multiple indices"),
+        ),
+    )
+    def test_isel(self, indices, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.s
+        x = np.arange(len(array)) * unit_registry.m
+
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        expected = attach_units(
+            strip_units(data_array).isel(x=indices),
+            {"data": unit_registry.s, "x": unit_registry.m},
+        )
+        result = data_array.isel(x=indices)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(
+        reason="xarray does not support duck arrays in dimension coordinates"
+    )
+    @pytest.mark.parametrize(
+        "values",
+        (
+            pytest.param(12, id="single value"),
+            pytest.param([10, 5, 13], id="list of multiple values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "units,error",
+        (
+            pytest.param(1, KeyError, id="no units"),
+            pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
+            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
+            pytest.param(unit_registry.s, None, id="correct unit"),
+        ),
+    )
+    def test_sel(self, values, units, error, dtype):
+        array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
+        x = np.arange(len(array)) * unit_registry.s
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        values_with_units = values * units
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.sel(x=values_with_units)
+        else:
+            result_array = array[values]
+            result_data_array = data_array.sel(x=values_with_units)
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(
+        reason="xarray does not support duck arrays in dimension coordinates"
+    )
+    @pytest.mark.parametrize(
+        "values",
+        (
+            pytest.param(12, id="single value"),
+            pytest.param([10, 5, 13], id="list of multiple values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "units,error",
+        (
+            pytest.param(1, KeyError, id="no units"),
+            pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
+            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
+            pytest.param(unit_registry.s, None, id="correct unit"),
+        ),
+    )
+    def test_loc(self, values, units, error, dtype):
+        array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
+        x = np.arange(len(array)) * unit_registry.s
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        values_with_units = values * units
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.loc[values_with_units]
+        else:
+            result_array = array[values]
+            result_data_array = data_array.loc[values_with_units]
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(reason="tries to coerce using asarray")
+    @pytest.mark.parametrize(
+        "shape",
+        (
+            pytest.param((10, 20), id="nothing squeezable"),
+            pytest.param((10, 20, 1), id="last dimension squeezable"),
+            pytest.param((10, 1, 20), id="middle dimension squeezable"),
+            pytest.param((1, 10, 20), id="first dimension squeezable"),
+            pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"),
+        ),
+    )
+    def test_squeeze(self, shape, dtype):
+        names = "xyzt"
+        coords = {
+            name: np.arange(length).astype(dtype)
+            * (unit_registry.m if name != "t" else unit_registry.s)
+            for name, length in zip(names, shape)
+        }
+        array = np.arange(10 * 20).astype(dtype).reshape(shape) * unit_registry.J
+        data_array = xr.DataArray(
+            data=array, coords=coords, dims=tuple(names[: len(shape)])
+        )
+
+        result_array = array.squeeze()
+        result_data_array = data_array.squeeze()
+        assert_equal_with_units(result_array, result_data_array)
+
+        # try squeezing the dimensions separately
+        names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
+        for index, name in enumerate(names):
+            assert_equal_with_units(
+                np.squeeze(array, axis=index), data_array.squeeze(dim=name)
+            )
+
+    @pytest.mark.xfail(
+        reason="indexes strip units and head / tail / thin only support integers"
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "func",
+        (method("head", x=7, y=3), method("tail", x=7, y=3), method("thin", x=7, y=3)),
+        ids=repr,
+    )
+    def test_head_tail_thin(self, func, unit, error, dtype):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+
+        coords = {
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+        }
+
+        arr = xr.DataArray(data=array, coords=coords, dims=("x", "y"))
+
+        kwargs = {name: value * unit for name, value in func.kwargs.items()}
+
+        if error is not None:
+            with pytest.raises(error):
+                func(arr, **kwargs)
+
+            return
+
+        expected = attach_units(func(strip_units(arr)), extract_units(arr))
+        result = func(arr, **kwargs)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_interp(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        new_coords = (np.arange(10) + 0.5) * unit
+        coords = {
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.interp(x=new_coords)
+        else:
+            new_coords_ = (
+                new_coords.magnitude if hasattr(new_coords, "magnitude") else new_coords
+            )
+            result_array = strip_units(data_array).interp(
+                x=new_coords_ * unit_registry.degK
+            )
+            result_data_array = data_array.interp(x=new_coords)
+
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(reason="tries to coerce using asarray")
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_interp_like(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        coords = {
+            "x": (np.arange(10) + 0.3) * unit_registry.m,
+            "y": (np.arange(5) + 0.3) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        new_data_array = xr.DataArray(
+            data=np.empty((20, 10)),
+            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
+            dims=("x", "y"),
+        )
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.interp_like(new_data_array)
+        else:
+            result_array = (
+                xr.DataArray(
+                    data=array.magnitude,
+                    coords={name: value.magnitude for name, value in coords.items()},
+                    dims=("x", "y"),
+                ).interp_like(strip_units(new_data_array))
+                * unit_registry.degK
+            )
+            result_data_array = data_array.interp_like(new_data_array)
+
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(
+        reason="pint does not implement np.result_type in __array_function__ yet"
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_reindex(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        new_coords = (np.arange(10) + 0.5) * unit
+        coords = {
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.interp(x=new_coords)
+        else:
+            result_array = strip_units(data_array).reindex(
+                x=(
+                    new_coords.magnitude
+                    if hasattr(new_coords, "magnitude")
+                    else new_coords
+                )
+                * unit_registry.degK
+            )
+            result_data_array = data_array.reindex(x=new_coords)
+
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(
+        reason="pint does not implement np.result_type in __array_function__ yet"
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_reindex_like(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        coords = {
+            "x": (np.arange(10) + 0.3) * unit_registry.m,
+            "y": (np.arange(5) + 0.3) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        new_data_array = xr.DataArray(
+            data=np.empty((20, 10)),
+            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
+            dims=("x", "y"),
+        )
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.reindex_like(new_data_array)
+        else:
+            expected = attach_units(
+                strip_units(data_array).reindex_like(strip_units(new_data_array)),
+                {
+                    "data": unit_registry.degK,
+                    "x": unit_registry.m,
+                    "y": unit_registry.m,
+                },
+            )
+            result = data_array.reindex_like(new_data_array)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (method("unstack"), method("reset_index", "z"), method("reorder_levels")),
+        ids=repr,
+    )
+    def test_stacking_stacked(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+        x = np.arange(array.shape[0])
+        y = np.arange(array.shape[1])
+
+        data_array = xr.DataArray(
+            name="data", data=array, coords={"x": x, "y": y}, dims=("x", "y")
+        )
+        stacked = data_array.stack(z=("x", "y"))
+
+        expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m})
+        result = func(stacked)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="indexes strip the label units")
+    def test_to_unstacked_dataset(self, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
+            * unit_registry.pascal
+        )
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * unit_registry.s
+
+        data_array = xr.DataArray(
+            data=array, coords={"x": x, "y": y}, dims=("x", "y")
+        ).stack(z=("x", "y"))
+
+        func = method("to_unstacked_dataset", dim="z")
+
+        expected = attach_units(
+            func(strip_units(data_array)),
+            {"y": y.units, **dict(zip(x.magnitude, [array.units] * len(y)))},
+        ).rename({elem.magnitude: elem for elem in x})
         result = func(data_array)
 
+        print(data_array, expected, result, sep="\n")
+
         assert_equal_with_units(expected, result)
 
+        assert False
+
     @pytest.mark.parametrize(
         "func",
         (
-            pytest.param(lambda x: 2 * x, id="multiply"),
-            pytest.param(lambda x: x + x, id="add"),
-            pytest.param(lambda x: x[0] + x, id="add scalar"),
+            method("transpose", "y", "x", "z"),
+            method("stack", a=("x", "y")),
+            method("set_index", x="x2"),
+            pytest.param(
+                method("shift", x=2), marks=pytest.mark.xfail(reason="strips units")
+            ),
+            pytest.param(
+                method("roll", x=2, roll_coords=False),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            method("sortby", "x2"),
+        ),
+        ids=repr,
+    )
+    def test_stacking_reordering(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 2 * 5 * 10).reshape(2, 5, 10).astype(dtype)
+            * unit_registry.m
+        )
+        x = np.arange(array.shape[0])
+        y = np.arange(array.shape[1])
+        z = np.arange(array.shape[2])
+        x2 = np.linspace(0, 1, array.shape[0])[::-1]
+
+        data_array = xr.DataArray(
+            name="data",
+            data=array,
+            coords={"x": x, "y": y, "z": z, "x2": ("x", x2)},
+            dims=("x", "y", "z"),
+        )
+
+        expected = attach_units(
+            func(strip_units(data_array)), {"data": unit_registry.m}
+        )
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("diff", dim="x"),
+            method("differentiate", coord="x"),
+            method("integrate", dim="x"),
+            pytest.param(
+                method("quantile", q=[0.25, 0.75]),
+                marks=pytest.mark.xfail(
+                    reason="pint does not implement nanpercentile yet"
+                ),
+            ),
+            pytest.param(
+                method("reduce", func=np.sum, dim="x"),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            pytest.param(
+                lambda x: x.dot(x),
+                id="method_dot",
+                marks=pytest.mark.xfail(reason="pint does not implement einsum"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_computation(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * unit_registry.s
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        units = extract_units(data_array)
+
+        expected = attach_units(func(strip_units(data_array)), units)
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                method("groupby", "y"), marks=pytest.mark.xfail(reason="strips units")
+            ),
+            pytest.param(
+                method("groupby_bins", "y", bins=4),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            method("coarsen", y=2),
+            pytest.param(
+                method("rolling", y=3), marks=pytest.mark.xfail(reason="strips units")
+            ),
+            pytest.param(
+                method("rolling_exp", y=3),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_computation_objects(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * 3 * unit_registry.s
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        units = extract_units(data_array)
+
+        expected = attach_units(func(strip_units(data_array)).mean(), units)
+        result = func(data_array).mean()
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="strips units")
+    def test_resample(self, dtype):
+        array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+
+        time = pd.date_range("10-09-2010", periods=len(array), freq="1y")
+        data_array = xr.DataArray(data=array, coords={"time": time}, dims="time")
+        units = extract_units(data_array)
+
+        func = method("resample", time="6m")
+
+        expected = attach_units(func(strip_units(data_array)).mean(), units)
+        result = func(data_array).mean()
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                method("assign_coords", {"z": (["x"], np.arange(5) * unit_registry.s)}),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            pytest.param(method("first")),
+            pytest.param(method("last")),
             pytest.param(
-                lambda x: x.T @ x,
-                id="matrix multiply",
-                marks=pytest.mark.xfail(
-                    reason="pint does not support matrix multiplication yet"
-                ),
+                method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
+                marks=pytest.mark.xfail(reason="strips units"),
             ),
         ),
+        ids=repr,
     )
-    def test_binary_operations(self, func, dtype):
-        array = np.arange(10).astype(dtype) * unit_registry.m
-        data_array = xr.DataArray(data=array)
+    def test_grouped_operations(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
 
-        expected = xr.DataArray(data=func(array))
-        result = func(data_array)
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * 3 * unit_registry.s
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        units = extract_units(data_array)
+
+        expected = attach_units(func(strip_units(data_array).groupby("y")), units)
+        result = func(data_array.groupby("y"))
 
         assert_equal_with_units(expected, result)
 
+
+class TestDataset:
     @pytest.mark.parametrize(
-        "comparison",
+        "unit,error",
         (
-            pytest.param(operator.lt, id="less_than"),
-            pytest.param(operator.ge, id="greater_equal"),
-            pytest.param(operator.eq, id="equal"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.mm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="same_unit"),
         ),
     )
     @pytest.mark.parametrize(
-        "unit,error",
+        "shared",
         (
-            pytest.param(1, ValueError, id="without_unit"),
+            "nothing",
             pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+                "dims",
+                marks=pytest.mark.xfail(reason="reindex does not work with pint yet"),
+            ),
+            pytest.param(
+                "coords",
+                marks=pytest.mark.xfail(reason="reindex does not work with pint yet"),
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incorrect_unit"),
-            pytest.param(unit_registry.m, None, id="correct_unit"),
         ),
     )
-    def test_comparison_operations(self, comparison, unit, error, dtype):
-        array = (
-            np.array([10.1, 5.2, 6.5, 8.0, 21.3, 7.1, 1.3]).astype(dtype)
-            * unit_registry.m
-        )
-        data_array = xr.DataArray(data=array)
+    def test_init(self, shared, unit, error, dtype):
+        original_unit = unit_registry.m
+        scaled_unit = unit_registry.mm
 
-        value = 8
-        to_compare_with = value * unit
+        a = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa
+        b = np.linspace(-1, 0, 12).astype(dtype) * unit_registry.Pa
 
-        # incompatible units are all not equal
-        if error is not None and comparison is not operator.eq:
-            with pytest.raises(error):
-                comparison(array, to_compare_with)
+        raw_x = np.arange(a.shape[0])
+        x = raw_x * original_unit
+        x2 = x.to(scaled_unit)
 
-            with pytest.raises(error):
-                comparison(data_array, to_compare_with)
+        raw_y = np.arange(b.shape[0])
+        y = raw_y * unit
+        y_units = unit if isinstance(y, unit_registry.Quantity) else None
+        if isinstance(y, unit_registry.Quantity):
+            if y.check(scaled_unit):
+                y2 = y.to(scaled_unit)
+            else:
+                y2 = y * 1000
+            y2_units = y2.units
         else:
-            result = comparison(data_array, to_compare_with)
-            # pint compares incompatible arrays to False, so we need to extend
-            # the multiplication works for both scalar and array results
-            expected = xr.DataArray(
-                data=comparison(array, to_compare_with)
-                * np.ones_like(array, dtype=bool)
-            )
+            y2 = y * 1000
+            y2_units = None
 
-            assert_equal_with_units(expected, result)
+        variants = {
+            "nothing": ({"x": x, "x2": ("x", x2)}, {"y": y, "y2": ("y", y2)}),
+            "dims": (
+                {"x": x, "x2": ("x", strip_units(x2))},
+                {"x": y, "y2": ("x", strip_units(y2))},
+            ),
+            "coords": ({"x": raw_x, "y": ("x", x2)}, {"x": raw_y, "y": ("x", y2)}),
+        }
+        coords_a, coords_b = variants.get(shared)
+
+        dims_a, dims_b = ("x", "y") if shared == "nothing" else ("x", "x")
+
+        arr1 = xr.DataArray(data=a, coords=coords_a, dims=dims_a)
+        arr2 = xr.DataArray(data=b, coords=coords_b, dims=dims_b)
+        if error is not None and shared != "nothing":
+            with pytest.raises(error):
+                xr.Dataset(data_vars={"a": arr1, "b": arr2})
+
+            return
+
+        result = xr.Dataset(data_vars={"a": arr1, "b": arr2})
+
+        expected_units = {
+            "a": a.units,
+            "b": b.units,
+            "x": x.units,
+            "x2": x2.units,
+            "y": y_units,
+            "y2": y2_units,
+        }
+        expected = attach_units(
+            xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}),
+            expected_units,
+        )
+        assert_equal_with_units(result, expected)
 
     @pytest.mark.parametrize(
-        "units,error",
+        "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
+    )
+    @pytest.mark.parametrize(
+        "variant",
         (
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.m, DimensionalityError, id="incorrect unit"),
-            pytest.param(unit_registry.degree, None, id="correct unit"),
+            pytest.param(
+                "with_dims",
+                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
+            ),
+            pytest.param("with_coords"),
+            pytest.param("without_coords"),
         ),
     )
-    def test_univariate_ufunc(self, units, error, dtype):
-        array = np.arange(10).astype(dtype) * units
-        data_array = xr.DataArray(data=array)
+    @pytest.mark.filterwarnings("error:::pint[.*]")
+    def test_repr(self, func, variant, dtype):
+        array1 = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.Pa
+        array2 = np.linspace(0, 1, 10, dtype=dtype) * unit_registry.degK
 
-        if error is not None:
-            with pytest.raises(error):
-                np.sin(data_array)
-        else:
-            expected = xr.DataArray(data=np.sin(array))
-            result = np.sin(data_array)
+        x = np.arange(len(array1)) * unit_registry.s
+        y = x.to(unit_registry.ms)
 
-            assert_equal_with_units(expected, result)
+        variants = {
+            "with_dims": {"x": x},
+            "with_coords": {"y": ("x", y)},
+            "without_coords": {},
+        }
 
-    @pytest.mark.xfail(reason="pint's implementation of `np.maximum` strips units")
-    def test_bivariate_ufunc(self, dtype):
-        unit = unit_registry.m
-        array = np.arange(10).astype(dtype) * unit
-        data_array = xr.DataArray(data=array)
+        data_array = xr.Dataset(
+            data_vars={"a": ("x", array1), "b": ("x", array2)},
+            coords=variants.get(variant),
+        )
 
-        expected = xr.DataArray(np.maximum(array, 0 * unit))
+        # FIXME: this just checks that the repr does not raise
+        # warnings or errors, but does not check the result
+        func(data_array)
 
-        assert_equal_with_units(expected, np.maximum(data_array, 0 * unit))
-        assert_equal_with_units(expected, np.maximum(0 * unit, data_array))
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                function("all"),
+                marks=pytest.mark.xfail(reason="not implemented by pint"),
+            ),
+            pytest.param(
+                function("any"),
+                marks=pytest.mark.xfail(reason="not implemented by pint"),
+            ),
+            function("argmax"),
+            function("argmin"),
+            function("max"),
+            function("min"),
+            function("mean"),
+            pytest.param(
+                function("median"),
+                marks=pytest.mark.xfail(
+                    reason="np.median does not work with dataset yet"
+                ),
+            ),
+            pytest.param(
+                function("sum"),
+                marks=pytest.mark.xfail(
+                    reason="np.result_type not implemented by pint"
+                ),
+            ),
+            pytest.param(
+                function("prod"),
+                marks=pytest.mark.xfail(reason="not implemented by pint"),
+            ),
+            function("std"),
+            function("var"),
+            function("cumsum"),
+            pytest.param(
+                function("cumprod"),
+                marks=pytest.mark.xfail(
+                    reason="pint does not support cumprod on non-dimensionless yet"
+                ),
+            ),
+            pytest.param(
+                method("all"), marks=pytest.mark.xfail(reason="not implemented by pint")
+            ),
+            pytest.param(
+                method("any"), marks=pytest.mark.xfail(reason="not implemented by pint")
+            ),
+            method("argmax"),
+            method("argmin"),
+            method("max"),
+            method("min"),
+            method("mean"),
+            method("median"),
+            pytest.param(
+                method("sum"),
+                marks=pytest.mark.xfail(
+                    reason="np.result_type not implemented by pint"
+                ),
+            ),
+            pytest.param(
+                method("prod"),
+                marks=pytest.mark.xfail(reason="not implemented by pint"),
+            ),
+            method("std"),
+            method("var"),
+            method("cumsum"),
+            pytest.param(
+                method("cumprod"),
+                marks=pytest.mark.xfail(
+                    reason="pint does not support cumprod on non-dimensionless yet"
+                ),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_aggregation(self, func, dtype):
+        unit_a = unit_registry.Pa
+        unit_b = unit_registry.kg / unit_registry.m ** 3
+        a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x")
+        b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x")
+        x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x")
+        y = xr.DataArray(
+            data=np.arange(10, 20).astype(dtype) * unit_registry.s, dims="x"
+        )
 
-    @pytest.mark.parametrize("property", ("T", "imag", "real"))
-    def test_numpy_properties(self, property, dtype):
-        array = (
-            np.arange(5 * 10).astype(dtype)
-            + 1j * np.linspace(-1, 0, 5 * 10).astype(dtype)
-        ).reshape(5, 10) * unit_registry.s
-        data_array = xr.DataArray(data=array, dims=("x", "y"))
+        ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y})
 
-        expected = xr.DataArray(
-            data=getattr(array, property),
-            dims=("x", "y")[:: 1 if property != "T" else -1],
+        result = func(ds)
+        expected = attach_units(
+            func(strip_units(ds)),
+            {"a": array_extract_units(func(a)), "b": array_extract_units(func(b))},
         )
-        result = getattr(data_array, property)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(result, expected)
+
+    @pytest.mark.parametrize("property", ("imag", "real"))
+    def test_numpy_properties(self, property, dtype):
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(
+                    data=np.linspace(0, 1, 10) * unit_registry.Pa, dims="x"
+                ),
+                "b": xr.DataArray(
+                    data=np.linspace(-1, 0, 15) * unit_registry.Pa, dims="y"
+                ),
+            },
+            coords={
+                "x": np.arange(10) * unit_registry.m,
+                "y": np.arange(15) * unit_registry.s,
+            },
+        )
+        units = extract_units(ds)
+
+        result = getattr(ds, property)
+        expected = attach_units(getattr(strip_units(ds), property), units)
+
+        assert_equal_with_units(result, expected)
 
     @pytest.mark.parametrize(
         "func",
         (
+            method("astype", float),
             method("conj"),
             method("argsort"),
             method("conjugate"),
@@ -647,17 +2861,33 @@ def test_numpy_properties(self, property, dtype):
         ids=repr,
     )
     def test_numpy_methods(self, func, dtype):
-        array = np.arange(10).astype(dtype) * unit_registry.m
-        data_array = xr.DataArray(data=array, dims="x")
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(
+                    data=np.linspace(1, -1, 10) * unit_registry.Pa, dims="x"
+                ),
+                "b": xr.DataArray(
+                    data=np.linspace(-1, 1, 15) * unit_registry.Pa, dims="y"
+                ),
+            },
+            coords={
+                "x": np.arange(10) * unit_registry.m,
+                "y": np.arange(15) * unit_registry.s,
+            },
+        )
+        units = {
+            "a": array_extract_units(func(ds.a)),
+            "b": array_extract_units(func(ds.b)),
+            "x": unit_registry.m,
+            "y": unit_registry.s,
+        }
 
-        expected = xr.DataArray(func(array), dims="x")
-        result = func(data_array)
+        result = func(ds)
+        expected = attach_units(func(strip_units(ds)), units)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(result, expected)
 
-    @pytest.mark.parametrize(
-        "func", (method("clip", min=3, max=8), method("searchsorted", v=5)), ids=repr
-    )
+    @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr)
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -671,8 +2901,29 @@ def test_numpy_methods(self, func, dtype):
         ),
     )
     def test_numpy_methods_with_args(self, func, unit, error, dtype):
-        array = np.arange(10).astype(dtype) * unit_registry.m
-        data_array = xr.DataArray(data=array)
+        data_unit = unit_registry.m
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=np.arange(10) * data_unit, dims="x"),
+                "b": xr.DataArray(data=np.arange(15) * data_unit, dims="y"),
+            },
+            coords={
+                "x": np.arange(10) * unit_registry.m,
+                "y": np.arange(15) * unit_registry.s,
+            },
+        )
+        units = extract_units(ds)
+
+        def strip(value):
+            return (
+                value.magnitude if isinstance(value, unit_registry.Quantity) else value
+            )
+
+        def convert(value, to):
+            if isinstance(value, unit_registry.Quantity) and value.check(to):
+                return value.to(to)
+
+            return value
 
         scalar_types = (int, float)
         kwargs = {
@@ -680,25 +2931,26 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype):
             for key, value in func.kwargs.items()
         }
 
+        stripped_kwargs = {
+            key: strip(convert(value, data_unit)) for key, value in kwargs.items()
+        }
+
         if error is not None:
             with pytest.raises(error):
-                func(data_array, **kwargs)
-        else:
-            expected = func(array, **kwargs)
-            if func.name not in ["searchsorted"]:
-                expected = xr.DataArray(data=expected)
-            result = func(data_array, **kwargs)
+                func(ds, **kwargs)
 
-            if func.name in ["searchsorted"]:
-                assert np.allclose(expected, result)
-            else:
-                assert_equal_with_units(expected, result)
+            return
+
+        result = func(ds, **kwargs)
+        expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
+
+        assert_equal_with_units(result, expected)
 
     @pytest.mark.parametrize(
         "func", (method("isnull"), method("notnull"), method("count")), ids=repr
     )
     def test_missing_value_detection(self, func, dtype):
-        array = (
+        array1 = (
             np.array(
                 [
                     [1.4, 2.3, np.nan, 7.2],
@@ -709,41 +2961,84 @@ def test_missing_value_detection(self, func, dtype):
             )
             * unit_registry.degK
         )
-        x = np.arange(array.shape[0]) * unit_registry.m
-        y = np.arange(array.shape[1]) * unit_registry.m
+        array2 = (
+            np.array(
+                [
+                    [np.nan, 5.7, 12.0, 7.2],
+                    [np.nan, 12.4, np.nan, 4.2],
+                    [9.8, np.nan, 4.6, 1.4],
+                    [7.2, np.nan, 6.3, np.nan],
+                    [8.4, 3.9, np.nan, np.nan],
+                ]
+            )
+            * unit_registry.Pa
+        )
 
-        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        x = np.arange(array1.shape[0]) * unit_registry.m
+        y = np.arange(array1.shape[1]) * unit_registry.m
+        z = np.arange(array2.shape[0]) * unit_registry.m
 
-        expected = func(strip_units(data_array))
-        result = func(data_array)
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("z", "x")),
+            },
+            coords={"x": x, "y": y, "z": z},
+        )
+
+        expected = func(strip_units(ds))
+        result = func(ds)
 
         assert_equal_with_units(expected, result)
 
-    @pytest.mark.xfail(reason="ffill and bfill lose units in data")
+    @pytest.mark.xfail(reason="ffill and bfill lose the unit")
     @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
     def test_missing_value_filling(self, func, dtype):
-        array = (
+        array1 = (
             np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
             * unit_registry.degK
         )
-        x = np.arange(len(array))
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+        array2 = (
+            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
+            * unit_registry.Pa
+        )
 
-        result_without_units = func(strip_units(data_array), dim="x")
-        result = xr.DataArray(
-            data=result_without_units.data * unit_registry.degK,
+        x = np.arange(len(array1))
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
             coords={"x": x},
-            dims=["x"],
         )
 
         expected = attach_units(
-            func(strip_units(data_array), dim="x"), {"data": unit_registry.degK}
+            func(strip_units(ds), dim="x"),
+            {"a": unit_registry.degK, "b": unit_registry.Pa},
         )
-        result = func(data_array, dim="x")
+        result = func(ds, dim="x")
 
         assert_equal_with_units(expected, result)
 
     @pytest.mark.xfail(reason="fillna drops the unit")
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(
+                1,
+                DimensionalityError,
+                id="no_unit",
+                marks=pytest.mark.xfail(reason="blocked by the failing `where`"),
+            ),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
     @pytest.mark.parametrize(
         "fill_value",
         (
@@ -758,30 +3053,59 @@ def test_missing_value_filling(self, func, dtype):
             pytest.param(np.array([-1]), id="numpy array"),
         ),
     )
-    def test_fillna(self, fill_value, dtype):
-        unit = unit_registry.m
-        array = np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * unit
-        data_array = xr.DataArray(data=array)
+    def test_fillna(self, fill_value, unit, error, dtype):
+        array1 = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.m
+        )
+        array2 = (
+            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
+            * unit_registry.m
+        )
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            }
+        )
+
+        if error is not None:
+            with pytest.raises(error):
+                ds.fillna(value=fill_value * unit)
 
+            return
+
+        result = ds.fillna(value=fill_value * unit)
         expected = attach_units(
-            strip_units(data_array).fillna(value=fill_value), {"data": unit}
+            strip_units(ds).fillna(value=fill_value),
+            {"a": unit_registry.m, "b": unit_registry.m},
         )
-        result = data_array.fillna(value=fill_value * unit)
 
         assert_equal_with_units(expected, result)
 
     def test_dropna(self, dtype):
-        array = (
+        array1 = (
             np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
-            * unit_registry.m
+            * unit_registry.degK
+        )
+        array2 = (
+            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
+            * unit_registry.Pa
+        )
+        x = np.arange(len(array1))
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
         )
-        x = np.arange(len(array))
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
 
         expected = attach_units(
-            strip_units(data_array).dropna(dim="x"), {"data": unit_registry.m}
+            strip_units(ds).dropna(dim="x"),
+            {"a": unit_registry.degK, "b": unit_registry.Pa},
         )
-        result = data_array.dropna(dim="x")
+        result = ds.dropna(dim="x")
 
         assert_equal_with_units(expected, result)
 
@@ -797,32 +3121,57 @@ def test_dropna(self, dtype):
         ),
     )
     def test_isin(self, unit, dtype):
-        array = (
+        array1 = (
             np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
             * unit_registry.m
         )
-        data_array = xr.DataArray(data=array, dims="x")
+        array2 = (
+            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
+            * unit_registry.m
+        )
+        x = np.arange(len(array1))
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
+        )
 
         raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype)
         values = raw_values * unit
 
-        result_without_units = strip_units(data_array).isin(raw_values)
-        if unit != unit_registry.m:
-            result_without_units[:] = False
-        result_with_units = data_array.isin(values)
+        if (
+            isinstance(values, unit_registry.Quantity)
+            and values.check(unit_registry.m)
+            and unit != unit_registry.m
+        ):
+            raw_values = values.to(unit_registry.m).magnitude
 
-        assert_equal_with_units(result_without_units, result_with_units)
+        expected = strip_units(ds).isin(raw_values)
+        if not isinstance(values, unit_registry.Quantity) or not values.check(
+            unit_registry.m
+        ):
+            expected.a[:] = False
+            expected.b[:] = False
+        result = ds.isin(values)
+
+        assert_equal_with_units(result, expected)
 
     @pytest.mark.parametrize(
         "variant",
         (
             pytest.param(
                 "masking",
-                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
+                marks=pytest.mark.xfail(
+                    reason="np.result_type not implemented by quantity"
+                ),
             ),
             pytest.param(
                 "replacing_scalar",
-                marks=pytest.mark.xfail(reason="scalar not convertible using astype"),
+                marks=pytest.mark.xfail(
+                    reason="python scalar not convertible using astype"
+                ),
             ),
             pytest.param(
                 "replacing_array",
@@ -853,11 +3202,18 @@ def _strip_units(mapping):
             return {key: array_strip_units(value) for key, value in mapping.items()}
 
         original_unit = unit_registry.m
-        array = np.linspace(0, 1, 10).astype(dtype) * original_unit
+        array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit
+        array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit
 
-        data_array = xr.DataArray(data=array)
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": np.arange(len(array1))},
+        )
 
-        condition = data_array < 0.5 * original_unit
+        condition = ds < 0.5 * original_unit
         other = np.linspace(-2, -1, 10).astype(dtype) * unit
         variant_kwargs = {
             "masking": {"cond": condition},
@@ -870,33 +3226,46 @@ def _strip_units(mapping):
 
         if variant not in ("masking", "dropping") and error is not None:
             with pytest.raises(error):
-                data_array.where(**kwargs)
-        else:
-            expected = attach_units(
-                strip_units(array).where(**kwargs_without_units),
-                {"data": original_unit},
-            )
-            result = data_array.where(**kwargs)
+                ds.where(**kwargs)
 
-            assert_equal_with_units(expected, result)
+            return
+
+        expected = attach_units(
+            strip_units(ds).where(**kwargs_without_units),
+            {"a": original_unit, "b": original_unit},
+        )
+        result = ds.where(**kwargs)
+
+        assert_equal_with_units(expected, result)
 
     @pytest.mark.xfail(reason="interpolate strips units")
     def test_interpolate_na(self, dtype):
-        array = (
-            np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1])
-            * unit_registry.m
+        array1 = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.degK
+        )
+        array2 = (
+            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
+            * unit_registry.Pa
+        )
+        x = np.arange(len(array1))
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
         )
-        x = np.arange(len(array))
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype)
 
         expected = attach_units(
-            strip_units(data_array).interpolate_na(dim="x"), {"data": unit_registry.m}
+            strip_units(ds).interpolate_na(dim="x"),
+            {"a": unit_registry.degK, "b": unit_registry.Pa},
         )
-        result = data_array.interpolate_na(dim="x")
+        result = ds.interpolate_na(dim="x")
 
         assert_equal_with_units(expected, result)
 
-    @pytest.mark.xfail(reason="uses DataArray.where, which currently fails")
+    @pytest.mark.xfail(reason="uses Dataset.where, which currently fails")
     @pytest.mark.parametrize(
         "unit,error",
         (
@@ -906,31 +3275,49 @@ def test_interpolate_na(self, dtype):
             ),
             pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            pytest.param(unit_registry.m, None, id="same_unit"),
         ),
     )
     def test_combine_first(self, unit, error, dtype):
-        array = np.zeros(shape=(2, 2), dtype=dtype) * unit_registry.m
-        other_array = np.ones_like(array) * unit
-
-        data_array = xr.DataArray(
-            data=array, coords={"x": ["a", "b"], "y": [-1, 0]}, dims=["x", "y"]
+        array1 = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.degK
         )
-        other = xr.DataArray(
-            data=other_array, coords={"x": ["b", "c"], "y": [0, 1]}, dims=["x", "y"]
+        array2 = (
+            np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype)
+            * unit_registry.Pa
+        )
+        x = np.arange(len(array1))
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
+        )
+        other_array1 = np.ones_like(array1) * unit
+        other_array2 = -1 * np.ones_like(array2) * unit
+        other = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=other_array1, dims="x"),
+                "b": xr.DataArray(data=other_array2, dims="x"),
+            },
+            coords={"x": np.arange(array1.shape[0])},
         )
 
         if error is not None:
             with pytest.raises(error):
-                data_array.combine_first(other)
-        else:
-            expected = attach_units(
-                strip_units(data_array).combine_first(strip_units(other)),
-                {"data": unit_registry.m},
-            )
-            result = data_array.combine_first(other)
+                ds.combine_first(other)
 
-            assert_equal_with_units(expected, result)
+            return
+
+        expected = attach_units(
+            strip_units(ds).combine_first(strip_units(other)),
+            {"a": unit_registry.m, "b": unit_registry.m},
+        )
+        result = ds.combine_first(other)
+
+        assert_equal_with_units(expected, result)
 
     @pytest.mark.parametrize(
         "unit",
@@ -958,31 +3345,39 @@ def test_combine_first(self, unit, error, dtype):
     )
     @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
     def test_comparisons(self, func, variation, unit, dtype):
-        data = np.linspace(0, 5, 10).astype(dtype)
-        coord = np.arange(len(data)).astype(dtype)
+        array1 = np.linspace(0, 5, 10).astype(dtype)
+        array2 = np.linspace(-5, 0, 10).astype(dtype)
 
-        base_unit = unit_registry.m
-        quantity = data * base_unit
-        x = coord * base_unit
-        y = coord * base_unit
+        coord = np.arange(len(array1)).astype(dtype)
+
+        original_unit = unit_registry.m
+        quantity1 = array1 * original_unit
+        quantity2 = array2 * original_unit
+        x = coord * original_unit
+        y = coord * original_unit
 
         units = {
-            "data": (unit, base_unit, base_unit),
-            "dims": (base_unit, unit, base_unit),
-            "coords": (base_unit, base_unit, unit),
+            "data": (unit, original_unit, original_unit),
+            "dims": (original_unit, unit, original_unit),
+            "coords": (original_unit, original_unit, unit),
         }
         data_unit, dim_unit, coord_unit = units.get(variation)
 
-        data_array = xr.DataArray(
-            data=quantity, coords={"x": x, "y": ("x", y)}, dims="x"
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=quantity1, dims="x"),
+                "b": xr.DataArray(data=quantity2, dims="x"),
+            },
+            coords={"x": x, "y": ("x", y)},
         )
 
         other = attach_units(
-            strip_units(data_array),
+            strip_units(ds),
             {
-                None: (data_unit, base_unit if quantity.check(data_unit) else None),
-                "x": (dim_unit, base_unit if x.check(dim_unit) else None),
-                "y": (coord_unit, base_unit if y.check(coord_unit) else None),
+                "a": (data_unit, original_unit if quantity1.check(data_unit) else None),
+                "b": (data_unit, original_unit if quantity2.check(data_unit) else None),
+                "x": (dim_unit, original_unit if x.check(dim_unit) else None),
+                "y": (coord_unit, original_unit if y.check(coord_unit) else None),
             },
         )
 
@@ -990,20 +3385,55 @@ def test_comparisons(self, func, variation, unit, dtype):
         # also, express this in terms of calls on the raw data array
         # and then check the units
         equal_arrays = (
-            np.all(quantity == other.data)
+            np.all(ds.a.data == other.a.data)
+            and np.all(ds.b.data == other.b.data)
             and (np.all(x == other.x.data) or True)  # dims can't be checked yet
             and np.all(y == other.y.data)
         )
         equal_units = (
-            data_unit == unit_registry.m
-            and coord_unit == unit_registry.m
-            and dim_unit == unit_registry.m
+            data_unit == original_unit
+            and coord_unit == original_unit
+            and dim_unit == original_unit
         )
         expected = equal_arrays and (func.name != "identical" or equal_units)
-        result = func(data_array, other)
+        result = func(ds, other)
 
         assert expected == result
 
+    @pytest.mark.xfail(reason="blocked by `where`")
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
+            pytest.param(unit_registry.m, id="identical_unit"),
+        ),
+    )
+    def test_broadcast_like(self, unit, dtype):
+        array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa
+        array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa
+
+        x1 = np.arange(2) * unit_registry.m
+        x2 = np.arange(2) * unit
+        y1 = np.array([0]) * unit_registry.m
+        y2 = np.arange(3) * unit
+
+        ds1 = xr.Dataset(
+            data_vars={"a": (("x", "y"), array1)}, coords={"x": x1, "y": y1}
+        )
+        ds2 = xr.Dataset(
+            data_vars={"a": (("x", "y"), array2)}, coords={"x": x2, "y": y2}
+        )
+
+        expected = attach_units(
+            strip_units(ds1).broadcast_like(strip_units(ds2)), extract_units(ds1)
+        )
+        result = ds1.broadcast_like(ds2)
+
+        assert_equal_with_units(expected, result)
+
     @pytest.mark.parametrize(
         "unit",
         (
@@ -1015,128 +3445,147 @@ def test_comparisons(self, func, variation, unit, dtype):
         ),
     )
     def test_broadcast_equals(self, unit, dtype):
-        left_array = np.ones(shape=(2, 2), dtype=dtype) * unit_registry.m
-        right_array = array_attach_units(
+        left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m
+        left_array2 = np.zeros(shape=(2, 6), dtype=dtype) * unit_registry.m
+
+        right_array1 = array_attach_units(
             np.ones(shape=(2,), dtype=dtype),
             unit,
-            convert_from=unit_registry.m if left_array.check(unit) else None,
+            convert_from=unit_registry.m if left_array1.check(unit) else None,
+        )
+        right_array2 = array_attach_units(
+            np.ones(shape=(2,), dtype=dtype),
+            unit,
+            convert_from=unit_registry.m if left_array2.check(unit) else None,
         )
 
-        left = xr.DataArray(data=left_array, dims=("x", "y"))
-        right = xr.DataArray(data=right_array, dims="x")
+        left = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=left_array1, dims=("x", "y")),
+                "b": xr.DataArray(data=left_array2, dims=("x", "z")),
+            }
+        )
+        right = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=right_array1, dims="x"),
+                "b": xr.DataArray(data=right_array2, dims="x"),
+            }
+        )
 
-        expected = np.all(left_array == right_array[:, None])
+        expected = np.all(left_array1 == right_array1[:, None]) and np.all(
+            left_array2 == right_array2[:, None]
+        )
         result = left.broadcast_equals(right)
 
         assert expected == result
 
     @pytest.mark.parametrize(
         "func",
-        (
-            method("pipe", lambda da: da * 10),
-            method("assign_coords", y2=("y", np.arange(10) * unit_registry.mm)),
-            method("assign_attrs", attr1="value"),
-            method("rename", x2="x_mm"),
-            method("swap_dims", {"x": "x2"}),
-            method(
-                "expand_dims",
-                dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
-                axis=1,
-            ),
-            method("drop", labels="x"),
-            method("reset_coords", names="x2"),
-            method("copy"),
-            pytest.param(
-                method("astype", np.float32),
-                marks=pytest.mark.xfail(reason="units get stripped"),
-            ),
-            pytest.param(
-                method("item", 1), marks=pytest.mark.xfail(reason="units get stripped")
-            ),
-        ),
+        (method("unstack"), method("reset_index", "v"), method("reorder_levels")),
         ids=repr,
     )
-    def test_content_manipulation(self, func, dtype):
-        quantity = (
-            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
-            * unit_registry.pascal
+    def test_stacking_stacked(self, func, dtype):
+        array1 = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+        array2 = (
+            np.linspace(-10, 0, 5 * 10 * 15).reshape(5, 10, 15).astype(dtype)
+            * unit_registry.m
         )
-        x = np.arange(quantity.shape[0]) * unit_registry.m
-        y = np.arange(quantity.shape[1]) * unit_registry.m
-        x2 = x.to(unit_registry.mm)
 
-        data_array = xr.DataArray(
-            name="data",
-            data=quantity,
-            coords={"x": x, "x2": ("x", x2), "y": y},
-            dims=("x", "y"),
+        x = np.arange(array1.shape[0])
+        y = np.arange(array1.shape[1])
+        z = np.arange(array2.shape[2])
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
+            },
+            coords={"x": x, "y": y, "z": z},
         )
 
-        stripped_kwargs = {
-            key: array_strip_units(value) for key, value in func.kwargs.items()
-        }
+        stacked = ds.stack(v=("x", "y"))
+
         expected = attach_units(
-            func(strip_units(data_array), **stripped_kwargs),
-            {
-                "data": quantity.units,
-                "x": x.units,
-                "x_mm": x2.units,
-                "x2": x2.units,
-                "y": y.units,
-            },
+            func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m}
+        )
+        result = func(stacked)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="tries to subscript scalar quantities")
+    def test_to_stacked_array(self, dtype):
+        labels = np.arange(5).astype(dtype) * unit_registry.s
+        arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels}
+
+        ds = xr.Dataset(
+            data_vars={
+                name: xr.DataArray(data=array, dims="x")
+                for name, array in arrays.items()
+            }
+        )
+
+        func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"])
+
+        result = func(ds).rename(None)
+        expected = attach_units(
+            func(strip_units(ds)).rename(None),
+            {None: unit_registry.m, "y": unit_registry.s},
         )
-        result = func(data_array)
 
         assert_equal_with_units(expected, result)
 
     @pytest.mark.parametrize(
         "func",
         (
+            method("transpose", "y", "x", "z1", "z2"),
+            method("stack", a=("x", "y")),
+            method("set_index", x="x2"),
             pytest.param(
-                method("drop_sel", labels=dict(x=np.array([1, 5]))),
-                marks=pytest.mark.xfail(
-                    reason="selecting using incompatible units does not raise"
-                ),
+                method("shift", x=2), marks=pytest.mark.xfail(reason="sets all to nan")
             ),
-            pytest.param(method("copy", data=np.arange(20))),
-        ),
-        ids=repr,
-    )
-    @pytest.mark.parametrize(
-        "unit,error",
-        (
-            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
-                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+                method("roll", x=2, roll_coords=False),
+                marks=pytest.mark.xfail(reason="strips units"),
             ),
-            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, KeyError, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
+            method("sortby", "x2"),
         ),
+        ids=repr,
     )
-    def test_content_manipulation_with_units(self, func, unit, error, dtype):
-        quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal
-        x = np.arange(len(quantity)) * unit_registry.m
+    def test_stacking_reordering(self, func, dtype):
+        array1 = (
+            np.linspace(0, 10, 2 * 5 * 10).reshape(2, 5, 10).astype(dtype)
+            * unit_registry.Pa
+        )
+        array2 = (
+            np.linspace(0, 10, 2 * 5 * 15).reshape(2, 5, 15).astype(dtype)
+            * unit_registry.degK
+        )
 
-        data_array = xr.DataArray(name="data", data=quantity, coords={"x": x}, dims="x")
+        x = np.arange(array1.shape[0])
+        y = np.arange(array1.shape[1])
+        z1 = np.arange(array1.shape[2])
+        z2 = np.arange(array2.shape[2])
 
-        kwargs = {
-            key: (value * unit if isinstance(value, np.ndarray) else value)
-            for key, value in func.kwargs.items()
-        }
-        stripped_kwargs = func.kwargs
+        x2 = np.linspace(0, 1, array1.shape[0])[::-1]
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y", "z1")),
+                "b": xr.DataArray(data=array2, dims=("x", "y", "z2")),
+            },
+            coords={"x": x, "y": y, "z1": z1, "z2": z2, "x2": ("x", x2)},
+        )
 
         expected = attach_units(
-            func(strip_units(data_array), **stripped_kwargs),
-            {"data": quantity.units if func.name == "drop_sel" else unit, "x": x.units},
+            func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK}
         )
-        if error is not None and func.name == "drop_sel":
-            with pytest.raises(error):
-                func(data_array, **kwargs)
-        else:
-            result = func(data_array, **kwargs)
-            assert_equal_with_units(expected, result)
+        result = func(ds)
+
+        assert_equal_with_units(expected, result)
 
+    @pytest.mark.xfail(reason="indexes strip units")
     @pytest.mark.parametrize(
         "indices",
         (
@@ -1145,16 +3594,23 @@ def test_content_manipulation_with_units(self, func, unit, error, dtype):
         ),
     )
     def test_isel(self, indices, dtype):
-        array = np.arange(10).astype(dtype) * unit_registry.s
-        x = np.arange(len(array)) * unit_registry.m
-
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+        array1 = np.arange(10).astype(dtype) * unit_registry.s
+        array2 = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa
+
+        x = np.arange(len(array1)) * unit_registry.m
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
+        )
 
         expected = attach_units(
-            strip_units(data_array).isel(x=indices),
-            {"data": unit_registry.s, "x": unit_registry.m},
+            strip_units(ds).isel(x=indices),
+            {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m},
         )
-        result = data_array.isel(x=indices)
+        result = ds.isel(x=indices)
 
         assert_equal_with_units(expected, result)
 
@@ -1164,34 +3620,48 @@ def test_isel(self, indices, dtype):
     @pytest.mark.parametrize(
         "values",
         (
-            pytest.param(12, id="single value"),
-            pytest.param([10, 5, 13], id="list of multiple values"),
-            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+            pytest.param(12, id="single_value"),
+            pytest.param([10, 5, 13], id="list_of_values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"),
         ),
     )
     @pytest.mark.parametrize(
         "units,error",
         (
-            pytest.param(1, KeyError, id="no units"),
+            pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
-            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
-            pytest.param(unit_registry.s, None, id="correct unit"),
+            pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
+            pytest.param(unit_registry.ms, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.s, None, id="same_unit"),
         ),
     )
     def test_sel(self, values, units, error, dtype):
-        array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
-        x = np.arange(len(array)) * unit_registry.s
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+        array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK
+        array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa
+        x = np.arange(len(array1)) * unit_registry.s
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
+        )
 
         values_with_units = values * units
 
         if error is not None:
             with pytest.raises(error):
-                data_array.sel(x=values_with_units)
-        else:
-            result_array = array[values]
-            result_data_array = data_array.sel(x=values_with_units)
-            assert_equal_with_units(result_array, result_data_array)
+                ds.sel(x=values_with_units)
+
+            return
+
+        expected = attach_units(
+            strip_units(ds).sel(x=values),
+            {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s},
+        )
+        result = ds.sel(x=values_with_units)
+        assert_equal_with_units(expected, result)
 
     @pytest.mark.xfail(
         reason="xarray does not support duck arrays in dimension coordinates"
@@ -1207,28 +3677,96 @@ def test_sel(self, values, units, error, dtype):
     @pytest.mark.parametrize(
         "units,error",
         (
-            pytest.param(1, KeyError, id="no units"),
+            pytest.param(1, KeyError, id="no_units"),
             pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
-            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
-            pytest.param(unit_registry.s, None, id="correct unit"),
+            pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"),
+            pytest.param(unit_registry.ms, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.s, None, id="same_unit"),
         ),
     )
     def test_loc(self, values, units, error, dtype):
-        array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
-        x = np.arange(len(array)) * unit_registry.s
-        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+        array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK
+        array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa
+        x = np.arange(len(array1)) * unit_registry.s
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims="x"),
+                "b": xr.DataArray(data=array2, dims="x"),
+            },
+            coords={"x": x},
+        )
 
         values_with_units = values * units
 
         if error is not None:
             with pytest.raises(error):
-                data_array.loc[values_with_units]
-        else:
-            result_array = array[values]
-            result_data_array = data_array.loc[values_with_units]
-            assert_equal_with_units(result_array, result_data_array)
+                ds.loc[{"x": values_with_units}]
+
+            return
+
+        expected = attach_units(
+            strip_units(ds).loc[{"x": values}],
+            {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s},
+        )
+        result = ds.loc[{"x": values_with_units}]
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(
+        reason="indexes strip units and head / tail / thin only support integers"
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("head", x=7, y=3, z=6),
+            method("tail", x=7, y=3, z=6),
+            method("thin", x=7, y=3, z=6),
+        ),
+        ids=repr,
+    )
+    def test_head_tail_thin(self, func, unit, error, dtype):
+        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
+
+        coords = {
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+            "z": np.arange(8) * unit_registry.m,
+        }
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "z")),
+            },
+            coords=coords,
+        )
+
+        kwargs = {name: value * unit for name, value in func.kwargs.items()}
+
+        if error is not None:
+            with pytest.raises(error):
+                func(ds, **kwargs)
+
+            return
+
+        expected = attach_units(func(strip_units(ds)), extract_units(ds))
+        result = func(ds, **kwargs)
+
+        assert_equal_with_units(expected, result)
 
-    @pytest.mark.xfail(reason="tries to coerce using asarray")
     @pytest.mark.parametrize(
         "shape",
         (
@@ -1246,135 +3784,139 @@ def test_squeeze(self, shape, dtype):
             * (unit_registry.m if name != "t" else unit_registry.s)
             for name, length in zip(names, shape)
         }
-        array = np.arange(10 * 20).astype(dtype).reshape(shape) * unit_registry.J
-        data_array = xr.DataArray(
-            data=array, coords=coords, dims=tuple(names[: len(shape)])
+        array1 = (
+            np.linspace(0, 1, 10 * 20).astype(dtype).reshape(shape) * unit_registry.degK
+        )
+        array2 = (
+            np.linspace(1, 2, 10 * 20).astype(dtype).reshape(shape) * unit_registry.Pa
         )
 
-        result_array = array.squeeze()
-        result_data_array = data_array.squeeze()
-        assert_equal_with_units(result_array, result_data_array)
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=tuple(names[: len(shape)])),
+                "b": xr.DataArray(data=array2, dims=tuple(names[: len(shape)])),
+            },
+            coords=coords,
+        )
+        units = extract_units(ds)
+
+        expected = attach_units(strip_units(ds).squeeze(), units)
+
+        result = ds.squeeze()
+        assert_equal_with_units(result, expected)
 
         # try squeezing the dimensions separately
         names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
-        for index, name in enumerate(names):
-            assert_equal_with_units(
-                np.squeeze(array, axis=index), data_array.squeeze(dim=name)
-            )
+        for name in names:
+            expected = attach_units(strip_units(ds).squeeze(dim=name), units)
+            result = ds.squeeze(dim=name)
+            assert_equal_with_units(result, expected)
 
+    @pytest.mark.xfail(reason="ignores units")
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
     def test_interp(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        new_coords = (np.arange(10) + 0.5) * unit
+        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
+
         coords = {
             "x": np.arange(10) * unit_registry.m,
             "y": np.arange(5) * unit_registry.m,
+            "z": np.arange(8) * unit_registry.s,
         }
 
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "z")),
+            },
+            coords=coords,
+        )
+
+        new_coords = (np.arange(10) + 0.5) * unit
 
         if error is not None:
             with pytest.raises(error):
-                data_array.interp(x=new_coords)
-        else:
-            new_coords_ = (
-                new_coords.magnitude if hasattr(new_coords, "magnitude") else new_coords
-            )
-            result_array = strip_units(data_array).interp(
-                x=new_coords_ * unit_registry.degK
-            )
-            result_data_array = data_array.interp(x=new_coords)
+                ds.interp(x=new_coords)
 
-            assert_equal_with_units(result_array, result_data_array)
-
-    @pytest.mark.xfail(reason="tries to coerce using asarray")
-    @pytest.mark.parametrize(
-        "unit,error",
-        (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
-            pytest.param(unit_registry.cm, None, id="compatible_unit"),
-            pytest.param(unit_registry.m, None, id="identical_unit"),
-        ),
-    )
-    def test_interp_like(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        coords = {
-            "x": (np.arange(10) + 0.3) * unit_registry.m,
-            "y": (np.arange(5) + 0.3) * unit_registry.m,
-        }
+            return
 
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
-        new_data_array = xr.DataArray(
-            data=np.empty((20, 10)),
-            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
-            dims=("x", "y"),
+        expected = attach_units(
+            strip_units(ds).interp(x=strip_units(new_coords)), extract_units(ds)
         )
+        result = ds.interp(x=new_coords)
 
-        if error is not None:
-            with pytest.raises(error):
-                data_array.interp_like(new_data_array)
-        else:
-            result_array = (
-                xr.DataArray(
-                    data=array.magnitude,
-                    coords={name: value.magnitude for name, value in coords.items()},
-                    dims=("x", "y"),
-                ).interp_like(strip_units(new_data_array))
-                * unit_registry.degK
-            )
-            result_data_array = data_array.interp_like(new_data_array)
-
-            assert_equal_with_units(result_array, result_data_array)
+        assert_equal_with_units(result, expected)
 
-    @pytest.mark.xfail(
-        reason="pint does not implement np.result_type in __array_function__ yet"
-    )
+    @pytest.mark.xfail(reason="ignores units")
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_reindex(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
-        new_coords = (np.arange(10) + 0.5) * unit
+    def test_interp_like(self, unit, error, dtype):
+        array1 = (
+            np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
+        )
+        array2 = (
+            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
+        )
+
         coords = {
             "x": np.arange(10) * unit_registry.m,
             "y": np.arange(5) * unit_registry.m,
+            "z": np.arange(8) * unit_registry.m,
         }
 
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "z")),
+            },
+            coords=coords,
+        )
+
+        other = xr.Dataset(
+            data_vars={
+                "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")),
+                "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")),
+            },
+            coords={
+                "x": (np.arange(20) + 0.3) * unit,
+                "y": (np.arange(10) - 0.2) * unit,
+                "z": (np.arange(15) + 0.4) * unit,
+            },
+        )
 
         if error is not None:
             with pytest.raises(error):
-                data_array.interp(x=new_coords)
-        else:
-            result_array = strip_units(data_array).reindex(
-                x=(
-                    new_coords.magnitude
-                    if hasattr(new_coords, "magnitude")
-                    else new_coords
-                )
-                * unit_registry.degK
-            )
-            result_data_array = data_array.reindex(x=new_coords)
+                ds.interp_like(other)
+
+            return
 
-            assert_equal_with_units(result_array, result_data_array)
+        expected = attach_units(
+            strip_units(ds).interp_like(strip_units(other)), extract_units(ds)
+        )
+        result = ds.interp_like(other)
+
+        assert_equal_with_units(result, expected)
 
     @pytest.mark.xfail(
         reason="pint does not implement np.result_type in __array_function__ yet"
@@ -1382,139 +3924,116 @@ def test_reindex(self, unit, error):
     @pytest.mark.parametrize(
         "unit,error",
         (
-            pytest.param(1, None, id="no_unit"),
-            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
-            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
             pytest.param(unit_registry.cm, None, id="compatible_unit"),
             pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
     )
-    def test_reindex_like(self, unit, error):
-        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+    def test_reindex(self, unit, error):
+        array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa
+
         coords = {
-            "x": (np.arange(10) + 0.3) * unit_registry.m,
-            "y": (np.arange(5) + 0.3) * unit_registry.m,
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+            "z": np.arange(8) * unit_registry.s,
         }
 
-        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
-        new_data_array = xr.DataArray(
-            data=np.empty((20, 10)),
-            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
-            dims=("x", "y"),
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "z")),
+            },
+            coords=coords,
         )
 
+        new_coords = (np.arange(10) + 0.5) * unit
+
         if error is not None:
             with pytest.raises(error):
-                data_array.reindex_like(new_data_array)
-        else:
-            expected = attach_units(
-                strip_units(data_array).reindex_like(strip_units(new_data_array)),
-                {
-                    "data": unit_registry.degK,
-                    "x": unit_registry.m,
-                    "y": unit_registry.m,
-                },
-            )
-            result = data_array.reindex_like(new_data_array)
-
-            assert_equal_with_units(expected, result)
-
-    @pytest.mark.parametrize(
-        "func",
-        (method("unstack"), method("reset_index", "z"), method("reorder_levels")),
-        ids=repr,
-    )
-    def test_stacking_stacked(self, func, dtype):
-        array = (
-            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
-        )
-        x = np.arange(array.shape[0])
-        y = np.arange(array.shape[1])
-
-        data_array = xr.DataArray(
-            name="data", data=array, coords={"x": x, "y": y}, dims=("x", "y")
-        )
-        stacked = data_array.stack(z=("x", "y"))
-
-        expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m})
-        result = func(stacked)
-
-        assert_equal_with_units(expected, result)
-
-    @pytest.mark.xfail(reason="indexes strip the label units")
-    def test_to_unstacked_dataset(self, dtype):
-        array = (
-            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
-            * unit_registry.pascal
-        )
-        x = np.arange(array.shape[0]) * unit_registry.m
-        y = np.arange(array.shape[1]) * unit_registry.s
+                ds.interp(x=new_coords)
 
-        data_array = xr.DataArray(
-            data=array, coords={"x": x, "y": y}, dims=("x", "y")
-        ).stack(z=("x", "y"))
-
-        func = method("to_unstacked_dataset", dim="z")
+            return
 
         expected = attach_units(
-            func(strip_units(data_array)),
-            {"y": y.units, **dict(zip(x.magnitude, [array.units] * len(y)))},
-        ).rename({elem.magnitude: elem for elem in x})
-        result = func(data_array)
-
-        print(data_array, expected, result, sep="\n")
-
-        assert_equal_with_units(expected, result)
+            strip_units(ds).reindex(x=strip_units(new_coords)), extract_units(ds)
+        )
+        result = ds.reindex(x=new_coords)
 
-        assert False
+        assert_equal_with_units(result, expected)
 
+    @pytest.mark.xfail(
+        reason="pint does not implement np.result_type in __array_function__ yet"
+    )
     @pytest.mark.parametrize(
-        "func",
+        "unit,error",
         (
-            method("transpose", "y", "x", "z"),
-            method("stack", a=("x", "y")),
-            method("set_index", x="x2"),
-            pytest.param(
-                method("shift", x=2), marks=pytest.mark.xfail(reason="strips units")
-            ),
+            pytest.param(1, DimensionalityError, id="no_unit"),
             pytest.param(
-                method("roll", x=2, roll_coords=False),
-                marks=pytest.mark.xfail(reason="strips units"),
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
             ),
-            method("sortby", "x2"),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
         ),
-        ids=repr,
     )
-    def test_stacking_reordering(self, func, dtype):
-        array = (
-            np.linspace(0, 10, 2 * 5 * 10).reshape(2, 5, 10).astype(dtype)
-            * unit_registry.m
+    def test_reindex_like(self, unit, error, dtype):
+        array1 = (
+            np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
+        )
+        array2 = (
+            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
         )
-        x = np.arange(array.shape[0])
-        y = np.arange(array.shape[1])
-        z = np.arange(array.shape[2])
-        x2 = np.linspace(0, 1, array.shape[0])[::-1]
 
-        data_array = xr.DataArray(
-            name="data",
-            data=array,
-            coords={"x": x, "y": y, "z": z, "x2": ("x", x2)},
-            dims=("x", "y", "z"),
+        coords = {
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+            "z": np.arange(8) * unit_registry.m,
+        }
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "z")),
+            },
+            coords=coords,
+        )
+
+        other = xr.Dataset(
+            data_vars={
+                "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")),
+                "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")),
+            },
+            coords={
+                "x": (np.arange(20) + 0.3) * unit,
+                "y": (np.arange(10) - 0.2) * unit,
+                "z": (np.arange(15) + 0.4) * unit,
+            },
         )
 
+        if error is not None:
+            with pytest.raises(error):
+                ds.reindex_like(other)
+
+            return
+
         expected = attach_units(
-            func(strip_units(data_array)), {"data": unit_registry.m}
+            strip_units(ds).reindex_like(strip_units(other)), extract_units(ds)
         )
-        result = func(data_array)
+        result = ds.reindex_like(other)
 
-        assert_equal_with_units(expected, result)
+        assert_equal_with_units(result, expected)
 
     @pytest.mark.parametrize(
         "func",
         (
             method("diff", dim="x"),
             method("differentiate", coord="x"),
-            method("integrate", dim="x"),
+            method("integrate", coord="x"),
             pytest.param(
                 method("quantile", q=[0.25, 0.75]),
                 marks=pytest.mark.xfail(
@@ -1526,26 +4045,35 @@ def test_stacking_reordering(self, func, dtype):
                 marks=pytest.mark.xfail(reason="strips units"),
             ),
             pytest.param(
-                lambda x: x.dot(x),
-                id="method_dot",
-                marks=pytest.mark.xfail(reason="pint does not implement einsum"),
+                method("map", np.fabs),
+                marks=pytest.mark.xfail(reason="fabs strips units"),
             ),
         ),
         ids=repr,
     )
     def test_computation(self, func, dtype):
-        array = (
-            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        array1 = (
+            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
+        )
+        array2 = (
+            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
+        )
+        x = np.arange(10) * unit_registry.m
+        y = np.arange(5) * unit_registry.m
+        z = np.arange(8) * unit_registry.m
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "z")),
+            },
+            coords={"x": x, "y": y, "z": z},
         )
 
-        x = np.arange(array.shape[0]) * unit_registry.m
-        y = np.arange(array.shape[1]) * unit_registry.s
-
-        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
-        units = extract_units(data_array)
+        units = extract_units(ds)
 
-        expected = attach_units(func(strip_units(data_array)), units)
-        result = func(data_array)
+        expected = attach_units(func(strip_units(ds)), units)
+        result = func(ds)
 
         assert_equal_with_units(expected, result)
 
@@ -1553,51 +4081,76 @@ def test_computation(self, func, dtype):
         "func",
         (
             pytest.param(
-                method("groupby", "y"), marks=pytest.mark.xfail(reason="strips units")
+                method("groupby", "x"), marks=pytest.mark.xfail(reason="strips units")
             ),
             pytest.param(
-                method("groupby_bins", "y", bins=4),
+                method("groupby_bins", "x", bins=4),
                 marks=pytest.mark.xfail(reason="strips units"),
             ),
-            method("coarsen", y=2),
+            method("coarsen", x=2),
             pytest.param(
-                method("rolling", y=3), marks=pytest.mark.xfail(reason="strips units")
+                method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units")
             ),
             pytest.param(
-                method("rolling_exp", y=3),
+                method("rolling_exp", x=3),
                 marks=pytest.mark.xfail(reason="strips units"),
             ),
         ),
         ids=repr,
     )
     def test_computation_objects(self, func, dtype):
-        array = (
-            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        array1 = (
+            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
         )
+        array2 = (
+            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
+            * unit_registry.Pa
+        )
+        x = np.arange(10) * unit_registry.m
+        y = np.arange(5) * unit_registry.m
+        z = np.arange(8) * unit_registry.m
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
+            },
+            coords={"x": x, "y": y, "z": z},
+        )
+        units = extract_units(ds)
 
-        x = np.arange(array.shape[0]) * unit_registry.m
-        y = np.arange(array.shape[1]) * 3 * unit_registry.s
-
-        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
-        units = extract_units(data_array)
-
-        expected = attach_units(func(strip_units(data_array)).mean(), units)
-        result = func(data_array).mean()
+        args = [] if func.name != "groupby" else ["y"]
+        reduce_func = method("mean", *args)
+        expected = attach_units(reduce_func(func(strip_units(ds))), units)
+        result = reduce_func(func(ds))
 
         assert_equal_with_units(expected, result)
 
     @pytest.mark.xfail(reason="strips units")
     def test_resample(self, dtype):
-        array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
-
-        time = pd.date_range("10-09-2010", periods=len(array), freq="1y")
-        data_array = xr.DataArray(data=array, coords={"time": time}, dims="time")
-        units = extract_units(data_array)
+        array1 = (
+            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
+        )
+        array2 = (
+            np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa
+        )
+        t = pd.date_range("10-09-2010", periods=array1.shape[0], freq="1y")
+        y = np.arange(5) * unit_registry.m
+        z = np.arange(8) * unit_registry.m
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("time", "y")),
+                "b": xr.DataArray(data=array2, dims=("time", "z")),
+            },
+            coords={"time": t, "y": y, "z": z},
+        )
+        units = extract_units(ds)
 
         func = method("resample", time="6m")
 
-        expected = attach_units(func(strip_units(data_array)).mean(), units)
-        result = func(data_array).mean()
+        expected = attach_units(func(strip_units(ds)).mean(), units)
+        result = func(ds).mean()
 
         assert_equal_with_units(expected, result)
 
@@ -1605,30 +4158,173 @@ def test_resample(self, dtype):
         "func",
         (
             pytest.param(
-                method("assign_coords", {"z": (["x"], np.arange(5) * unit_registry.s)}),
+                method("assign", c=lambda ds: 10 * ds.b),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            pytest.param(
+                method("assign_coords", v=("x", np.arange(10) * unit_registry.s)),
                 marks=pytest.mark.xfail(reason="strips units"),
             ),
             pytest.param(method("first")),
             pytest.param(method("last")),
             pytest.param(
                 method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
-                marks=pytest.mark.xfail(reason="strips units"),
+                marks=pytest.mark.xfail(
+                    reason="dataset groupby does not implement quantile"
+                ),
             ),
         ),
         ids=repr,
     )
     def test_grouped_operations(self, func, dtype):
-        array = (
-            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        array1 = (
+            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK
         )
+        array2 = (
+            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
+            * unit_registry.Pa
+        )
+        x = np.arange(10) * unit_registry.m
+        y = np.arange(5) * unit_registry.m
+        z = np.arange(8) * unit_registry.m
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
+            },
+            coords={"x": x, "y": y, "z": z},
+        )
+        units = extract_units(ds)
+        units.update({"c": unit_registry.Pa, "v": unit_registry.s})
 
-        x = np.arange(array.shape[0]) * unit_registry.m
-        y = np.arange(array.shape[1]) * 3 * unit_registry.s
+        stripped_kwargs = {
+            name: strip_units(value) for name, value in func.kwargs.items()
+        }
+        expected = attach_units(
+            func(strip_units(ds).groupby("y"), **stripped_kwargs), units
+        )
+        result = func(ds.groupby("y"))
 
-        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
-        units = extract_units(data_array)
+        assert_equal_with_units(expected, result)
 
-        expected = attach_units(func(strip_units(data_array).groupby("y")), units)
-        result = func(data_array.groupby("y"))
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("pipe", lambda ds: ds * 10),
+            method("assign", d=lambda ds: ds.b * 10),
+            method("assign_coords", y2=("y", np.arange(5) * unit_registry.mm)),
+            method("assign_attrs", attr1="value"),
+            method("rename", x2="x_mm"),
+            method("rename_vars", c="temperature"),
+            method("rename_dims", x="offset_x"),
+            method("swap_dims", {"x": "x2"}),
+            method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1),
+            method("drop_sel", labels="x"),
+            method("drop_dims", "z"),
+            method("set_coords", names="c"),
+            method("reset_coords", names="x2"),
+            method("copy"),
+        ),
+        ids=repr,
+    )
+    def test_content_manipulation(self, func, dtype):
+        array1 = (
+            np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype)
+            * unit_registry.m ** 3
+        )
+        array2 = (
+            np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype)
+            * unit_registry.Pa
+        )
+        array3 = np.linspace(0, 10, 10).astype(dtype) * unit_registry.degK
+
+        x = np.arange(10) * unit_registry.m
+        x2 = x.to(unit_registry.mm)
+        y = np.arange(5) * unit_registry.m
+        z = np.arange(8) * unit_registry.m
+
+        ds = xr.Dataset(
+            data_vars={
+                "a": xr.DataArray(data=array1, dims=("x", "y")),
+                "b": xr.DataArray(data=array2, dims=("x", "y", "z")),
+                "c": xr.DataArray(data=array3, dims="x"),
+            },
+            coords={"x": x, "y": y, "z": z, "x2": ("x", x2)},
+        )
+        units = extract_units(ds)
+        units.update(
+            {
+                "y2": unit_registry.mm,
+                "x_mm": unit_registry.mm,
+                "offset_x": unit_registry.m,
+                "d": unit_registry.Pa,
+                "temperature": unit_registry.degK,
+            }
+        )
+
+        stripped_kwargs = {
+            key: strip_units(value) for key, value in func.kwargs.items()
+        }
+        expected = attach_units(func(strip_units(ds), **stripped_kwargs), units)
+        result = func(ds)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="blocked by reindex")
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, xr.MergeError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, xr.MergeError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, xr.MergeError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    @pytest.mark.parametrize("variant", ("data", "dims", "coords"))
+    def test_merge(self, variant, unit, error, dtype):
+        original_data_unit = unit_registry.m
+        original_dim_unit = unit_registry.m
+        original_coord_unit = unit_registry.m
+
+        variants = {
+            "data": (unit, original_dim_unit, original_coord_unit),
+            "dims": (original_data_unit, unit, original_coord_unit),
+            "coords": (original_data_unit, original_dim_unit, unit),
+        }
+        data_unit, dim_unit, coord_unit = variants.get(variant)
+
+        left_array = np.arange(10).astype(dtype) * original_data_unit
+        right_array = np.arange(-5, 5).astype(dtype) * data_unit
+
+        left_dim = np.arange(10, 20) * original_dim_unit
+        right_dim = np.arange(5, 15) * dim_unit
+
+        left_coord = np.arange(-10, 0) * original_coord_unit
+        right_coord = np.arange(-15, -5) * coord_unit
+
+        left = xr.Dataset(
+            data_vars={"a": ("x", left_array)},
+            coords={"x": left_dim, "y": ("x", left_coord)},
+        )
+        right = xr.Dataset(
+            data_vars={"a": ("x", right_array)},
+            coords={"x": right_dim, "y": ("x", right_coord)},
+        )
+
+        units = extract_units(left)
+
+        if error is not None:
+            with pytest.raises(error):
+                left.merge(right)
+
+            return
+
+        converted = convert_units(right, units)
+        expected = attach_units(strip_units(left).merge(strip_units(converted)), units)
+        result = left.merge(right)
 
         assert_equal_with_units(expected, result)
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index e59453e64d8..cedf6b572a3 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -542,6 +542,15 @@ def test_copy_index_with_data_errors(self):
         with raises_regex(ValueError, "must match shape of object"):
             orig.copy(data=new_data)
 
+    def test_replace(self):
+        var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"})
+        result = var._replace()
+        assert_identical(result, var)
+
+        new_data = np.arange(4).reshape(2, 2)
+        result = var._replace(data=new_data)
+        assert_array_equal(result.data, new_data)
+
     def test_real_and_imag(self):
         v = self.cls("x", np.arange(3) - 1j * np.arange(3), {"foo": "bar"})
         expected_re = self.cls("x", np.arange(3), {"foo": "bar"})
@@ -1477,6 +1486,10 @@ def test_reduce(self):
 
         with raises_regex(ValueError, "cannot supply both"):
             v.mean(dim="x", axis=0)
+        with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
+            v.mean(dim="x", allow_lazy=True)
+        with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
+            v.mean(dim="x", allow_lazy=False)
 
     def test_quantile(self):
         v = Variable(["x", "y"], self.d)