Skip to content
forked from pydata/xarray

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into fix/user-coordinates
Browse files Browse the repository at this point in the history
* upstream/master:
  Allow appending datetime & boolean variables to zarr stores (pydata#3504)
  warn if dim is passed to rolling operations. (pydata#3513)
  Deprecate allow_lazy (pydata#3435)
  Recursive tokenization (pydata#3515)
  format indexing.rst code with black (pydata#3511)
  • Loading branch information
dcherian committed Nov 14, 2019
2 parents 1a42c6c + 40588dc commit e1a3823
Show file tree
Hide file tree
Showing 14 changed files with 208 additions and 82 deletions.
138 changes: 82 additions & 56 deletions doc/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,20 +209,23 @@ simultaneously, returning a new dataset:

.. ipython:: python
da = xr.DataArray(np.random.rand(4, 3),
[('time', pd.date_range('2000-01-01', periods=4)),
('space', ['IA', 'IL', 'IN'])])
ds = da.to_dataset(name='foo')
da = xr.DataArray(
np.random.rand(4, 3),
[
("time", pd.date_range("2000-01-01", periods=4)),
("space", ["IA", "IL", "IN"]),
],
)
ds = da.to_dataset(name="foo")
ds.isel(space=[0], time=[0])
ds.sel(time='2000-01-01')
ds.sel(time="2000-01-01")
Positional indexing on a dataset is not supported because the ordering of
dimensions in a dataset is somewhat ambiguous (it can vary between different
arrays). However, you can do normal indexing with dimension names:

.. ipython:: python
ds[dict(space=[0], time=[0])]
ds.loc[dict(time='2000-01-01')]
Expand All @@ -248,7 +251,6 @@ Any variables with these dimensions are also dropped:
ds.drop_dims('time')
.. _masking with where:

Masking with ``where``
Expand Down Expand Up @@ -326,8 +328,12 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper:

.. ipython:: python
da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
da = xr.DataArray(
np.arange(12).reshape((3, 4)),
dims=["x", "y"],
coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
)
da
da[[0, 1], [1, 1]]
Expand Down Expand Up @@ -410,43 +416,56 @@ can use indexing with ``.loc`` :

.. ipython:: python
ds = xr.tutorial.open_dataset('air_temperature')
ds = xr.tutorial.open_dataset("air_temperature")
#add an empty 2D dataarray
ds['empty']= xr.full_like(ds.air.mean('time'),fill_value=0)
# add an empty 2D dataarray
ds["empty"] = xr.full_like(ds.air.mean("time"), fill_value=0)
#modify one grid point using loc()
ds['empty'].loc[dict(lon=260, lat=30)] = 100
# modify one grid point using loc()
ds["empty"].loc[dict(lon=260, lat=30)] = 100
#modify a 2D region using loc()
lc = ds.coords['lon']
la = ds.coords['lat']
ds['empty'].loc[dict(lon=lc[(lc>220)&(lc<260)], lat=la[(la>20)&(la<60)])] = 100
# modify a 2D region using loc()
lc = ds.coords["lon"]
la = ds.coords["lat"]
ds["empty"].loc[
dict(lon=lc[(lc > 220) & (lc < 260)], lat=la[(la > 20) & (la < 60)])
] = 100
or :py:meth:`~xarray.where`:

.. ipython:: python
#modify one grid point using xr.where()
ds['empty'] = xr.where((ds.coords['lat']==20)&(ds.coords['lon']==260), 100, ds['empty'])
# modify one grid point using xr.where()
ds["empty"] = xr.where(
(ds.coords["lat"] == 20) & (ds.coords["lon"] == 260), 100, ds["empty"]
)
# or modify a 2D region using xr.where()
mask = (
(ds.coords["lat"] > 20)
& (ds.coords["lat"] < 60)
& (ds.coords["lon"] > 220)
& (ds.coords["lon"] < 260)
)
ds["empty"] = xr.where(mask, 100, ds["empty"])
#or modify a 2D region using xr.where()
mask = (ds.coords['lat']>20)&(ds.coords['lat']<60)&(ds.coords['lon']>220)&(ds.coords['lon']<260)
ds['empty'] = xr.where(mask, 100, ds['empty'])
Vectorized indexing can also be used to assign values to xarray object.

.. ipython:: python
da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'],
coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']})
da = xr.DataArray(
np.arange(12).reshape((3, 4)),
dims=["x", "y"],
coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
)
da
da[0] = -1 # assignment with broadcasting
da
ind_x = xr.DataArray([0, 1], dims=['x'])
ind_y = xr.DataArray([0, 1], dims=['y'])
ind_x = xr.DataArray([0, 1], dims=["x"])
ind_y = xr.DataArray([0, 1], dims=["y"])
da[ind_x, ind_y] = -2 # assign -2 to (ix, iy) = (0, 0) and (1, 1)
da
Expand Down Expand Up @@ -508,10 +527,10 @@ flexible indexing. The following is an example of the pointwise indexing:

.. ipython:: python
da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y'])
da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"])
da
da.isel(x=xr.DataArray([0, 1, 6], dims='z'),
y=xr.DataArray([0, 1, 0], dims='z'))
da.isel(x=xr.DataArray([0, 1, 6], dims="z"), y=xr.DataArray([0, 1, 0], dims="z"))
where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected
and mapped along a new dimension ``z``.
Expand All @@ -521,23 +540,27 @@ you can supply a :py:class:`~xarray.DataArray` with a coordinate,

.. ipython:: python
da.isel(x=xr.DataArray([0, 1, 6], dims='z',
coords={'z': ['a', 'b', 'c']}),
y=xr.DataArray([0, 1, 0], dims='z'))
da.isel(
x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}),
y=xr.DataArray([0, 1, 0], dims="z"),
)
Analogously, label-based pointwise-indexing is also possible by the ``.sel``
method:

.. ipython:: python
da = xr.DataArray(np.random.rand(4, 3),
[('time', pd.date_range('2000-01-01', periods=4)),
('space', ['IA', 'IL', 'IN'])])
times = xr.DataArray(pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']),
dims='new_time')
da.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']),
time=times)
da = xr.DataArray(
np.random.rand(4, 3),
[
("time", pd.date_range("2000-01-01", periods=4)),
("space", ["IA", "IL", "IN"]),
],
)
times = xr.DataArray(
pd.to_datetime(["2000-01-03", "2000-01-02", "2000-01-01"]), dims="new_time"
)
da.sel(space=xr.DataArray(["IA", "IL", "IN"], dims=["new_time"]), time=times)
.. _align and reindex:

Expand Down Expand Up @@ -635,12 +658,16 @@ through the :py:attr:`~xarray.DataArray.indexes` attribute.

.. ipython:: python
da = xr.DataArray(np.random.rand(4, 3),
[('time', pd.date_range('2000-01-01', periods=4)),
('space', ['IA', 'IL', 'IN'])])
da = xr.DataArray(
np.random.rand(4, 3),
[
("time", pd.date_range("2000-01-01", periods=4)),
("space", ["IA", "IL", "IN"]),
],
)
da
da.indexes
da.indexes['time']
da.indexes["time"]
Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension,
falling back to a default :py:class:`pandas.RangeIndex` if it has no coordinate
Expand Down Expand Up @@ -694,32 +721,31 @@ pandas:

.. ipython:: python
midx = pd.MultiIndex.from_product([list('abc'), [0, 1]],
names=('one', 'two'))
mda = xr.DataArray(np.random.rand(6, 3),
[('x', midx), ('y', range(3))])
mda
mda.sel(x=(list('ab'), [0]))
midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two"))
mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))])
mda
mda.sel(x=(list("ab"), [0]))
You can also select multiple elements by providing a list of labels or tuples or
a slice of tuples:

.. ipython:: python
mda.sel(x=[('a', 0), ('b', 1)])
mda.sel(x=[('a', 0), ('b', 1)])
Additionally, xarray supports dictionaries:

.. ipython:: python
mda.sel(x={'one': 'a', 'two': 0})
mda.sel(x={'one': 'a', 'two': 0})
For convenience, ``sel`` also accepts multi-index levels directly
as keyword arguments:

.. ipython:: python
mda.sel(one='a', two=0)
mda.sel(one='a', two=0)
Note that using ``sel`` it is not possible to mix a dimension
indexer with level indexers for that dimension
Expand All @@ -731,7 +757,7 @@ multi-index is reduced to a single index.

.. ipython:: python
mda.loc[{'one': 'a'}, ...]
mda.loc[{'one': 'a'}, ...]
Unlike pandas, xarray does not guess whether you provide index levels or
dimensions when using ``loc`` in some ambiguous cases. For example, for
Expand Down
10 changes: 10 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ New Features
deterministic hashing in previous releases; this change implements it when whole
xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is
invoked. (:issue:`3378`, :pull:`3446`)
xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is
invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`)
By `Deepak Cherian <https://github.com/dcherian>`_ and
`Guido Imperiale <https://github.com/crusaderky>`_.
- xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk.
Expand All @@ -91,9 +93,14 @@ Bug fixes
By `Deepak Cherian <https://github.com/dcherian>`_.
- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
By `Anderson Banihirwe <https://github.com/andersy005>`_.
- Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`).
In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated.
By `Deepak Cherian <https://github.com/dcherian>`_.
- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and
:py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions.
(:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
- Allow appending datetime and bool data variables to zarr stores.
(:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.

Documentation
~~~~~~~~~~~~~
Expand Down Expand Up @@ -220,6 +227,9 @@ Bug fixes
By `Deepak Cherian <https://github.com/dcherian>`_.
- Fix error in concatenating unlabeled dimensions (:pull:`3362`).
By `Deepak Cherian <https://github.com/dcherian/>`_.
- Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is
specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created.
(:pull:`3362`). By `Deepak Cherian <https://github.com/dcherian/>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
7 changes: 5 additions & 2 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,15 +1234,18 @@ def _validate_datatypes_for_zarr_append(dataset):
def check_dtype(var):
if (
not np.issubdtype(var.dtype, np.number)
and not np.issubdtype(var.dtype, np.datetime64)
and not np.issubdtype(var.dtype, np.bool)
and not coding.strings.is_unicode_dtype(var.dtype)
and not var.dtype == object
):
# and not re.match('^bytes[1-9]+$', var.dtype.name)):
raise ValueError(
"Invalid dtype for data variable: {} "
"dtype must be a subtype of number, "
"a fixed sized string, a fixed size "
"unicode string or an object".format(var)
"datetime, bool, a fixed sized string, "
"a fixed size unicode string or an "
"object".format(var)
)

for k in dataset.data_vars.values():
Expand Down
17 changes: 4 additions & 13 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,12 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
if include_skipna:

def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
return self.reduce(
func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs
)
return self.reduce(func, dim, axis, skipna=skipna, **kwargs)

else:

def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore
return self.reduce(func, dim, axis, allow_lazy=True, **kwargs)
return self.reduce(func, dim, axis, **kwargs)

return wrapped_func

Expand Down Expand Up @@ -83,20 +81,13 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool

def wrapped_func(self, dim=None, skipna=None, **kwargs):
return self.reduce(
func,
dim,
skipna=skipna,
numeric_only=numeric_only,
allow_lazy=True,
**kwargs,
func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs
)

else:

def wrapped_func(self, dim=None, **kwargs): # type: ignore
return self.reduce(
func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs
)
return self.reduce(func, dim, numeric_only=numeric_only, **kwargs)

return wrapped_func

Expand Down
4 changes: 3 additions & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,9 @@ def reset_coords(
return dataset

def __dask_tokenize__(self):
return (type(self), self._variable, self._coords, self._name)
from dask.base import normalize_token

return normalize_token((type(self), self._variable, self._coords, self._name))

def __dask_graph__(self):
return self._to_temp_dataset().__dask_graph__()
Expand Down
8 changes: 6 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,11 @@ def load(self, **kwargs) -> "Dataset":
return self

def __dask_tokenize__(self):
return (type(self), self._variables, self._coord_names, self._attrs)
from dask.base import normalize_token

return normalize_token(
(type(self), self._variables, self._coord_names, self._attrs)
)

def __dask_graph__(self):
graphs = {k: v.__dask_graph__() for k, v in self.variables.items()}
Expand Down Expand Up @@ -4027,7 +4031,7 @@ def reduce(
keep_attrs: bool = None,
keepdims: bool = False,
numeric_only: bool = False,
allow_lazy: bool = False,
allow_lazy: bool = None,
**kwargs: Any,
) -> "Dataset":
"""Reduce this dataset by applying `func` along some dimension(s).
Expand Down
4 changes: 1 addition & 3 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,9 +585,7 @@ def _first_or_last(self, op, skipna, keep_attrs):
return self._obj
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)
return self.reduce(
op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True
)
return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs)

def first(self, skipna=None, keep_attrs=None):
"""Return the first element of each group along the group dimension
Expand Down
Loading

0 comments on commit e1a3823

Please sign in to comment.