Merge remote-tracking branch 'upstream/master' into fix/user-coordinates

* upstream/master: Allow appending datetime & boolean variables to zarr stores (pydata#3504) warn if dim is passed to rolling operations. (pydata#3513) Deprecate allow_lazy (pydata#3435) Recursive tokenization (pydata#3515) format indexing.rst code with black (pydata#3511)
dcherian · Nov 14, 2019 · e1a3823 · e1a3823
2 parents 1a42c6c + 40588dc
commit e1a3823
Show file tree

Hide file tree

Showing 14 changed files with 208 additions and 82 deletions.
diff --git a/doc/indexing.rst b/doc/indexing.rst
@@ -209,20 +209,23 @@ simultaneously, returning a new dataset:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 3),
-                      [('time', pd.date_range('2000-01-01', periods=4)),
-                       ('space', ['IA', 'IL', 'IN'])])
-    ds = da.to_dataset(name='foo')
+    da = xr.DataArray(
+        np.random.rand(4, 3),
+        [
+            ("time", pd.date_range("2000-01-01", periods=4)),
+            ("space", ["IA", "IL", "IN"]),
+        ],
+    )
+    ds = da.to_dataset(name="foo")
     ds.isel(space=[0], time=[0])
-    ds.sel(time='2000-01-01')
+    ds.sel(time="2000-01-01")
 
 Positional indexing on a dataset is not supported because the ordering of
 dimensions in a dataset is somewhat ambiguous (it can vary between different
 arrays). However, you can do normal indexing with dimension names:
 
 .. ipython:: python
 
-
     ds[dict(space=[0], time=[0])]
     ds.loc[dict(time='2000-01-01')]
 
@@ -248,7 +251,6 @@ Any variables with these dimensions are also dropped:
 
     ds.drop_dims('time')
 
-
 .. _masking with where:
 
 Masking with ``where``
@@ -326,8 +328,12 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
-                      coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
+
+    da = xr.DataArray(
+        np.arange(12).reshape((3, 4)),
+        dims=["x", "y"],
+        coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
+    )
     da
     da[[0, 1], [1, 1]]
 
@@ -410,43 +416,56 @@ can use indexing with ``.loc`` :
 
 .. ipython:: python
 
-    ds = xr.tutorial.open_dataset('air_temperature')
+    ds = xr.tutorial.open_dataset("air_temperature")
 
-    #add an empty 2D dataarray
-    ds['empty']= xr.full_like(ds.air.mean('time'),fill_value=0)
+    # add an empty 2D dataarray
+    ds["empty"] = xr.full_like(ds.air.mean("time"), fill_value=0)
 
-    #modify one grid point using loc()
-    ds['empty'].loc[dict(lon=260, lat=30)] = 100
+    # modify one grid point using loc()
+    ds["empty"].loc[dict(lon=260, lat=30)] = 100
 
-    #modify a 2D region using loc()
-    lc = ds.coords['lon']
-    la = ds.coords['lat']
-    ds['empty'].loc[dict(lon=lc[(lc>220)&(lc<260)], lat=la[(la>20)&(la<60)])] = 100
+    # modify a 2D region using loc()
+    lc = ds.coords["lon"]
+    la = ds.coords["lat"]
+    ds["empty"].loc[
+        dict(lon=lc[(lc > 220) & (lc < 260)], lat=la[(la > 20) & (la < 60)])
+    ] = 100
 
 or :py:meth:`~xarray.where`:
 
 .. ipython:: python
 
-    #modify one grid point using xr.where()
-    ds['empty'] = xr.where((ds.coords['lat']==20)&(ds.coords['lon']==260), 100, ds['empty'])
+    # modify one grid point using xr.where()
+    ds["empty"] = xr.where(
+        (ds.coords["lat"] == 20) & (ds.coords["lon"] == 260), 100, ds["empty"]
+    )
+
+    # or modify a 2D region using xr.where()
+    mask = (
+        (ds.coords["lat"] > 20)
+        & (ds.coords["lat"] < 60)
+        & (ds.coords["lon"] > 220)
+        & (ds.coords["lon"] < 260)
+    )
+    ds["empty"] = xr.where(mask, 100, ds["empty"])
 
-    #or modify a 2D region using xr.where()
-    mask = (ds.coords['lat']>20)&(ds.coords['lat']<60)&(ds.coords['lon']>220)&(ds.coords['lon']<260)
-    ds['empty'] = xr.where(mask, 100, ds['empty'])
 
 
 Vectorized indexing can also be used to assign values to xarray object.
 
 .. ipython:: python
 
-    da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
-                      coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
+    da = xr.DataArray(
+        np.arange(12).reshape((3, 4)),
+        dims=["x", "y"],
+        coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
+    )
     da
     da[0] = -1  # assignment with broadcasting
     da
 
-    ind_x = xr.DataArray([0, 1], dims=['x'])
-    ind_y = xr.DataArray([0, 1], dims=['y'])
+    ind_x = xr.DataArray([0, 1], dims=["x"])
+    ind_y = xr.DataArray([0, 1], dims=["y"])
     da[ind_x, ind_y] = -2  # assign -2 to (ix, iy) = (0, 0) and (1, 1)
     da
 
@@ -508,10 +527,10 @@ flexible indexing. The following is an example of the pointwise indexing:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y'])
+    da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"])
     da
-    da.isel(x=xr.DataArray([0, 1, 6], dims='z'),
-            y=xr.DataArray([0, 1, 0], dims='z'))
+    da.isel(x=xr.DataArray([0, 1, 6], dims="z"), y=xr.DataArray([0, 1, 0], dims="z"))
+
 
 where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected
 and mapped along a new dimension ``z``.
@@ -521,23 +540,27 @@ you can supply a :py:class:`~xarray.DataArray` with a coordinate,
 
 .. ipython:: python
 
-    da.isel(x=xr.DataArray([0, 1, 6], dims='z',
-                           coords={'z': ['a', 'b', 'c']}),
-            y=xr.DataArray([0, 1, 0], dims='z'))
-
+    da.isel(
+        x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}),
+        y=xr.DataArray([0, 1, 0], dims="z"),
+    )
+    
 Analogously, label-based pointwise-indexing is also possible by the ``.sel``
 method:
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 3),
-                      [('time', pd.date_range('2000-01-01', periods=4)),
-                       ('space', ['IA', 'IL', 'IN'])])
-    times = xr.DataArray(pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']),
-                         dims='new_time')
-    da.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']),
-           time=times)
-
+    da = xr.DataArray(
+        np.random.rand(4, 3),
+        [
+            ("time", pd.date_range("2000-01-01", periods=4)),
+            ("space", ["IA", "IL", "IN"]),
+        ],
+    )
+    times = xr.DataArray(
+        pd.to_datetime(["2000-01-03", "2000-01-02", "2000-01-01"]), dims="new_time"
+    )
+    da.sel(space=xr.DataArray(["IA", "IL", "IN"], dims=["new_time"]), time=times)
 
 .. _align and reindex:
 
@@ -635,12 +658,16 @@ through the :py:attr:`~xarray.DataArray.indexes` attribute.
 
 .. ipython:: python
 
-    da = xr.DataArray(np.random.rand(4, 3),
-                      [('time', pd.date_range('2000-01-01', periods=4)),
-                       ('space', ['IA', 'IL', 'IN'])])
+    da = xr.DataArray(
+        np.random.rand(4, 3),
+        [
+            ("time", pd.date_range("2000-01-01", periods=4)),
+            ("space", ["IA", "IL", "IN"]),
+        ],
+    )
     da
     da.indexes
-    da.indexes['time']
+    da.indexes["time"]
 
 Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension,
 falling back to a default :py:class:`pandas.RangeIndex` if it has no coordinate
@@ -694,32 +721,31 @@ pandas:
 
 .. ipython:: python
 
-  midx = pd.MultiIndex.from_product([list('abc'), [0, 1]],
-                                    names=('one', 'two'))
-  mda = xr.DataArray(np.random.rand(6, 3),
-                     [('x', midx), ('y', range(3))])
-  mda
-  mda.sel(x=(list('ab'), [0]))
+
+    midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two"))
+    mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))])
+    mda
+    mda.sel(x=(list("ab"), [0]))
 
 You can also select multiple elements by providing a list of labels or tuples or
 a slice of tuples:
 
 .. ipython:: python
 
-  mda.sel(x=[('a', 0), ('b', 1)])
+    mda.sel(x=[('a', 0), ('b', 1)])
 
 Additionally, xarray supports dictionaries:
 
 .. ipython:: python
 
-  mda.sel(x={'one': 'a', 'two': 0})
+    mda.sel(x={'one': 'a', 'two': 0})
 
 For convenience, ``sel`` also accepts multi-index levels directly
 as keyword arguments:
 
 .. ipython:: python
 
-  mda.sel(one='a', two=0)
+    mda.sel(one='a', two=0)
 
 Note that using ``sel`` it is not possible to mix a dimension
 indexer with level indexers for that dimension
@@ -731,7 +757,7 @@ multi-index is reduced to a single index.
 
 .. ipython:: python
 
-  mda.loc[{'one': 'a'}, ...]
+    mda.loc[{'one': 'a'}, ...]
 
 Unlike pandas, xarray does not guess whether you provide index levels or
 dimensions when using ``loc`` in some ambiguous cases. For example, for

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -74,6 +74,8 @@ New Features
   deterministic hashing in previous releases; this change implements it when whole
   xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is
   invoked. (:issue:`3378`, :pull:`3446`)
+  xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
+  invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
   By `Deepak Cherian <https://github.com/dcherian>`_ and
   `Guido Imperiale <https://github.com/crusaderky>`_.
 - xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk.
@@ -91,9 +93,14 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
   By `Anderson Banihirwe <https://github.com/andersy005>`_.
+- Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
+  In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
   :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
   (:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
+- Allow appending datetime and bool data variables to zarr stores.
+  (:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -220,6 +227,9 @@ Bug fixes
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix error in concatenating unlabeled dimensions (:pull:`3362`).
   By `Deepak Cherian <https://github.com/dcherian/>`_.
+- Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
+  specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
+  (:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian/>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -1234,15 +1234,18 @@ def _validate_datatypes_for_zarr_append(dataset):
     def check_dtype(var):
         if (
             not np.issubdtype(var.dtype, np.number)
+            and not np.issubdtype(var.dtype, np.datetime64)
+            and not np.issubdtype(var.dtype, np.bool)
             and not coding.strings.is_unicode_dtype(var.dtype)
             and not var.dtype == object
         ):
             # and not re.match('^bytes[1-9]+$', var.dtype.name)):
             raise ValueError(
                 "Invalid dtype for data variable: {} "
                 "dtype must be a subtype of number, "
-                "a fixed sized string, a fixed size "
-                "unicode string or an object".format(var)
+                "datetime, bool, a fixed sized string, "
+                "a fixed size unicode string or an "
+                "object".format(var)
             )
 
     for k in dataset.data_vars.values():

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -43,14 +43,12 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         if include_skipna:
 
             def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
-                return self.reduce(
-                    func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, axis, skipna=skipna, **kwargs)
 
         else:
 
             def wrapped_func(self, dim=None, axis=None, **kwargs):  # type: ignore
-                return self.reduce(func, dim, axis, allow_lazy=True, **kwargs)
+                return self.reduce(func, dim, axis, **kwargs)
 
         return wrapped_func
 
@@ -83,20 +81,13 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
 
             def wrapped_func(self, dim=None, skipna=None, **kwargs):
                 return self.reduce(
-                    func,
-                    dim,
-                    skipna=skipna,
-                    numeric_only=numeric_only,
-                    allow_lazy=True,
-                    **kwargs,
+                    func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs
                 )
 
         else:
 
             def wrapped_func(self, dim=None, **kwargs):  # type: ignore
-                return self.reduce(
-                    func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs
-                )
+                return self.reduce(func, dim, numeric_only=numeric_only, **kwargs)
 
         return wrapped_func
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -755,7 +755,9 @@ def reset_coords(
             return dataset
 
     def __dask_tokenize__(self):
-        return (type(self), self._variable, self._coords, self._name)
+        from dask.base import normalize_token
+
+        return normalize_token((type(self), self._variable, self._coords, self._name))
 
     def __dask_graph__(self):
         return self._to_temp_dataset().__dask_graph__()

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -652,7 +652,11 @@ def load(self, **kwargs) -> "Dataset":
         return self
 
     def __dask_tokenize__(self):
-        return (type(self), self._variables, self._coord_names, self._attrs)
+        from dask.base import normalize_token
+
+        return normalize_token(
+            (type(self), self._variables, self._coord_names, self._attrs)
+        )
 
     def __dask_graph__(self):
         graphs = {k: v.__dask_graph__() for k, v in self.variables.items()}
@@ -4027,7 +4031,7 @@ def reduce(
         keep_attrs: bool = None,
         keepdims: bool = False,
         numeric_only: bool = False,
-        allow_lazy: bool = False,
+        allow_lazy: bool = None,
         **kwargs: Any,
     ) -> "Dataset":
         """Reduce this dataset by applying `func` along some dimension(s).

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -585,9 +585,7 @@ def _first_or_last(self, op, skipna, keep_attrs):
             return self._obj
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=True)
-        return self.reduce(
-            op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True
-        )
+        return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs)
 
     def first(self, skipna=None, keep_attrs=None):
         """Return the first element of each group along the group dimension