From 1eefcdcdf0c18745b7858331af8e89bde8626b65 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 31 Jan 2020 17:10:01 +0000 Subject: [PATCH 01/24] Fix RTD build (#3737) * pin some requirements to reduce conda's memory usage * remove python section in readthedocs.yml --- ci/requirements/doc.yml | 21 +++++++++++---------- readthedocs.yml | 4 ---- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index a8b72dc0956..16cce5782e0 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -6,20 +6,21 @@ dependencies: - python=3.7 - bottleneck - cartopy - - cfgrib - - h5netcdf + - cfgrib>=0.9 + - dask>=2.10 + - h5netcdf>=0.7.4 - ipykernel - ipython - - iris + - iris>=2.3 - jupyter_client - nbsphinx - - netcdf4 + - netcdf4>=1.5 - numba - - numpy + - numpy>=1.17 - numpydoc - - pandas - - rasterio + - pandas>=1.0 + - rasterio>=1.1 - seaborn - - sphinx - - sphinx_rtd_theme - - zarr + - sphinx>=2.3 + - sphinx_rtd_theme>=0.4 + - zarr>=2.4 diff --git a/readthedocs.yml b/readthedocs.yml index 9ed8d28eaf2..88aee82a44b 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -6,8 +6,4 @@ build: conda: environment: ci/requirements/doc.yml -python: - version: 3.7 - install: [] - formats: [] From 58b11a63732e3066ad38dc1e63a733f4cce6425f Mon Sep 17 00:00:00 2001 From: Graham Inggs Date: Sun, 23 Feb 2020 21:39:39 +0200 Subject: [PATCH 02/24] Let test_repr_of_dataset pass on big-endian systems (#3772) --- xarray/tests/test_formatting_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index fea24ff93f8..01357000b20 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -130,5 +130,5 @@ def test_repr_of_dataset(dataset): assert ( formatted.count("class='xr-section-summary-in' type='checkbox' checked>") == 3 ) - assert "<U4" in formatted + assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted From 016a77d626338efc5a59fc50b7d82d153144d086 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 29 Feb 2020 15:47:11 -0500 Subject: [PATCH 03/24] raise on multiple string args to groupby (#3802) --- doc/whats-new.rst | 6 +++++- xarray/core/common.py | 11 +++++++++++ xarray/tests/test_groupby.py | 5 +++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6a5491e34dd..1deb77eecfc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,11 @@ New Features By `Kai Mühlbauer `_. - implement pint support. (:issue:`3594`, :pull:`3706`) By `Justus Magin `_. - +- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a + `TypeError` on multiple string arguments. Receiving multiple string arguments + often means a user is attempting to pass multiple dimensions to group over + and should instead pass a list. + By `Maximilian Roos `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index e908c69dd14..582ae310061 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -660,6 +660,17 @@ def groupby(self, group, squeeze: bool = True, restore_coord_dims: bool = None): core.groupby.DataArrayGroupBy core.groupby.DatasetGroupBy """ + # While we don't generally check the type of every arg, passing + # multiple dimensions as multiple arguments is common enough, and the + # consequences hidden enough (strings evaluate as true) to warrant + # checking here. + # A future version could make squeeze kwarg only, but would face + # backward-compat issues. + if not isinstance(squeeze, bool): + raise TypeError( + f"`squeeze` must be True or False, but {squeeze} was supplied" + ) + return self._groupby_cls( self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims ) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 97bd31ae050..77558e741be 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -483,6 +483,11 @@ def test_groupby_reduce_dimension_error(array): assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"])) +def test_groupby_multiple_string_args(array): + with pytest.raises(TypeError): + array.groupby("x", "y") + + def test_groupby_bins_timeseries(): ds = xr.Dataset() ds["time"] = xr.DataArray( From 45d88fc4b2524ecb0c1236cd31767d00f72b0ea1 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 2 Mar 2020 04:41:19 -0500 Subject: [PATCH 04/24] Enable pandas-style rounding of cftime.datetime objects (#3792) * Initial progress on implementing cftime floor/ceil/round * Improve tests and docstrings * Add tests of rounding cftime datetimes via dt accessor * Add documentation * docstring edits * Test rounding raises error with non-fixed frequency * black * typo * A couple cleanup items: - Fix floating point issue in asi8 and add tests - Ensure dask only computes once when using the rounding accessors * black --- doc/weather-climate.rst | 8 ++ doc/whats-new.rst | 4 + xarray/coding/cftimeindex.py | 135 +++++++++++++++++++++++++++++++ xarray/core/accessor_dt.py | 28 ++++--- xarray/tests/test_accessor_dt.py | 104 ++++++++++++++++++++++++ xarray/tests/test_cftimeindex.py | 89 ++++++++++++++++++++ 6 files changed, 359 insertions(+), 9 deletions(-) diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 96641c2b97e..9e7c0f1d51d 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -105,6 +105,14 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.time.dt.dayofyear da.time.dt.dayofweek +- Rounding of datetimes to fixed frequencies via the ``dt`` accessor: + +.. ipython:: python + + da.time.dt.ceil('3D') + da.time.dt.floor('5D') + da.time.dt.round('2D') + - Group-by operations based on datetime accessor attributes (e.g. by month of the year): diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1deb77eecfc..579719cb8d7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,10 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added support for :py:class:`pandas.DatetimeIndex`-style rounding of + ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the + :py:class:`~core.accessor_dt.DatetimeAccessor`. + By `Spencer Clark `_ - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 8b440812ca9..99f90430e91 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -528,6 +528,83 @@ def strftime(self, date_format): """ return pd.Index([date.strftime(date_format) for date in self._data]) + @property + def asi8(self): + """Convert to integers with units of microseconds since 1970-01-01.""" + from ..core.resample_cftime import exact_cftime_datetime_difference + + epoch = self.date_type(1970, 1, 1) + return np.array( + [ + _total_microseconds(exact_cftime_datetime_difference(epoch, date)) + for date in self.values + ] + ) + + def _round_via_method(self, freq, method): + """Round dates using a specified method.""" + from .cftime_offsets import CFTIME_TICKS, to_offset + + offset = to_offset(freq) + if not isinstance(offset, CFTIME_TICKS): + raise ValueError(f"{offset} is a non-fixed frequency") + + unit = _total_microseconds(offset.as_timedelta()) + values = self.asi8 + rounded = method(values, unit) + return _cftimeindex_from_i8(rounded, self.date_type, self.name) + + def floor(self, freq): + """Round dates down to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _floor_int) + + def ceil(self, freq): + """Round dates up to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _ceil_int) + + def round(self, freq): + """Round dates to a fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _round_to_nearest_half_even) + def _parse_iso8601_without_reso(date_type, datetime_str): date, _ = _parse_iso8601_with_reso(date_type, datetime_str) @@ -554,3 +631,61 @@ def _parse_array_of_cftime_strings(strings, date_type): return np.array( [_parse_iso8601_without_reso(date_type, s) for s in strings.ravel()] ).reshape(strings.shape) + + +def _cftimeindex_from_i8(values, date_type, name): + """Construct a CFTimeIndex from an array of integers. + + Parameters + ---------- + values : np.array + Integers representing microseconds since 1970-01-01. + date_type : cftime.datetime + Type of date for the index. + name : str + Name of the index. + + Returns + ------- + CFTimeIndex + """ + epoch = date_type(1970, 1, 1) + dates = np.array([epoch + timedelta(microseconds=int(value)) for value in values]) + return CFTimeIndex(dates, name=name) + + +def _total_microseconds(delta): + """Compute the total number of microseconds of a datetime.timedelta. + + Parameters + ---------- + delta : datetime.timedelta + Input timedelta. + + Returns + ------- + int + """ + return delta / timedelta(microseconds=1) + + +def _floor_int(values, unit): + """Copied from pandas.""" + return values - np.remainder(values, unit) + + +def _ceil_int(values, unit): + """Copied from pandas.""" + return values + np.remainder(-values, unit) + + +def _round_to_nearest_half_even(values, unit): + """Copied from pandas.""" + if unit % 2: + return _ceil_int(values - unit // 2, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index c407371f9f0..de0e332b26c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -78,20 +78,27 @@ def _get_date_field(values, name, dtype): return access_method(values, name) -def _round_series(values, name, freq): - """Coerce an array of datetime-like values to a pandas Series and - apply requested rounding +def _round_through_series_or_index(values, name, freq): + """Coerce an array of datetime-like values to a pandas Series or xarray + CFTimeIndex and apply requested rounding """ - values_as_series = pd.Series(values.ravel()) - method = getattr(values_as_series.dt, name) + from ..coding.cftimeindex import CFTimeIndex + + if is_np_datetime_like(values.dtype): + values_as_series = pd.Series(values.ravel()) + method = getattr(values_as_series.dt, name) + else: + values_as_cftimeindex = CFTimeIndex(values.ravel()) + method = getattr(values_as_cftimeindex, name) + field_values = method(freq=freq).values return field_values.reshape(values.shape) def _round_field(values, name, freq): - """Indirectly access pandas rounding functions by wrapping data - as a Series and calling through `.dt` attribute. + """Indirectly access rounding functions by wrapping data + as a Series or CFTimeIndex Parameters ---------- @@ -110,9 +117,12 @@ def _round_field(values, name, freq): if isinstance(values, dask_array_type): from dask.array import map_blocks - return map_blocks(_round_series, values, name, freq=freq, dtype=np.datetime64) + dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") + return map_blocks( + _round_through_series_or_index, values, name, freq=freq, dtype=dtype + ) else: - return _round_series(values, name, freq) + return _round_through_series_or_index(values, name, freq) def _strftime_through_cftimeindex(values, date_format): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index f178720a6e1..1a8a2732eeb 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -7,6 +7,7 @@ from . import ( assert_array_equal, assert_equal, + assert_identical, raises_regex, requires_cftime, requires_dask, @@ -435,3 +436,106 @@ def test_seasons(cftime_date_type): seasons = xr.DataArray(seasons) assert_array_equal(seasons.values, dates.dt.season.values) + + +@pytest.fixture +def cftime_rounding_dataarray(cftime_date_type): + return xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 1), cftime_date_type(1, 1, 1, 15)], + [cftime_date_type(1, 1, 1, 23), cftime_date_type(1, 1, 2, 1)], + ] + ) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_floor_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 1, 0)], + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="floor", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.floor(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.floor(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_ceil_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 3, 0)], + ], + name="ceil", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.ceil(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.ceil(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_round_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="round", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.round(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.round(freq) + + assert_identical(result, expected) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8025766529e..8d83b833ca3 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -904,3 +904,92 @@ def test_multiindex(): index = xr.cftime_range("2001-01-01", periods=100, calendar="360_day") mindex = pd.MultiIndex.from_arrays([index]) assert mindex.get_loc("2001-01") == slice(0, 30) + + +@requires_cftime +@pytest.mark.parametrize("freq", ["3663S", "33T", "2H"]) +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_against_datetimeindex(freq, method): + expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777S") + expected = getattr(expected, method)(freq) + result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + result = getattr(result, method)(freq).to_datetimeindex() + assert result.equals(expected) + + +@requires_cftime +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_invalid_freq(method): + index = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + with pytest.raises(ValueError, match="fixed"): + getattr(index, method)("MS") + + +@pytest.fixture +def rounding_index(date_type): + return xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 999512), + date_type(1, 1, 1, 3, 0, 1, 500001), + date_type(1, 1, 1, 7, 0, 6, 499999), + ] + ) + + +@requires_cftime +def test_ceil(rounding_index, date_type): + result = rounding_index.ceil("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 7, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_floor(rounding_index, date_type): + result = rounding_index.floor("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 0), + date_type(1, 1, 1, 3, 0, 1, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_round(rounding_index, date_type): + result = rounding_index.round("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_asi8(date_type): + index = xr.CFTimeIndex([date_type(1970, 1, 1), date_type(1970, 1, 2)]) + result = index.asi8 + expected = 1000000 * 86400 * np.array([0, 1]) + np.testing.assert_array_equal(result, expected) + + +@requires_cftime +def test_asi8_distant_date(): + """Test that asi8 conversion is truly exact.""" + import cftime + + date_type = cftime.DatetimeProlepticGregorian + index = xr.CFTimeIndex([date_type(10731, 4, 22, 3, 25, 45, 123456)]) + result = index.asi8 + expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456]) + np.testing.assert_array_equal(result, expected) From 8512b7bf498c0c300f146447c0b05545842e9404 Mon Sep 17 00:00:00 2001 From: niowniow Date: Mon, 2 Mar 2020 13:19:16 +0100 Subject: [PATCH 05/24] Fix zarr append with groups (#3610) * bug fixed and added zarr group tests * black . * added info to whats-new Co-authored-by: Ryan Abernathey --- doc/whats-new.rst | 2 ++ xarray/backends/zarr.py | 4 ++-- xarray/tests/test_backends.py | 35 ++++++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 579719cb8d7..2cc92c78ac8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,6 +56,8 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. +- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` + simultaneously. (:issue:`3170`). By `Matthias Meyer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 763769dac74..2469a31a3d9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -373,7 +373,7 @@ def store( if len(existing_variables) > 0: # there are variables to append # their encoding must be the same as in the store - ds = open_zarr(self.ds.store, chunks=None) + ds = open_zarr(self.ds.store, group=self.ds.path, chunks=None) variables_with_encoding = {} for vn in existing_variables: variables_with_encoding[vn] = variables[vn].copy(deep=False) @@ -487,7 +487,7 @@ def open_zarr( directory in file system where a Zarr DirectoryStore has been stored. synchronizer : object, optional Array synchronizer provided to zarr - group : str, obtional + group : str, optional Group path. (a.k.a. `path` in zarr terminology.) chunks : int or dict or tuple or {None, 'auto'}, optional Chunk sizes along each dimension, e.g., ``5`` or diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b7ba70ef6c4..015d2cbfdeb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1729,39 +1729,52 @@ def test_hidden_zarr_keys(self): pass @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") - def test_write_persistence_modes(self): + @pytest.mark.parametrize("group", [None, "group1"]) + def test_write_persistence_modes(self, group): original = create_test_data() # overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # don't overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w-"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w-", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # make sure overwriting works as expected with self.create_zarr_target() as store: self.save(original, store) # should overwrite with no error - self.save(original, store, mode="w") - with self.open(store) as actual: + self.save(original, store, mode="w", group=group) + with self.open(store, group=group) as actual: assert_identical(original, actual) with pytest.raises(ValueError): self.save(original, store, mode="w-") # check append mode for normal write - with self.roundtrip(original, save_kwargs={"mode": "a"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "a", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) - ds, ds_to_append, _ = create_append_test_data() - # check append mode for append write + ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") - ds_to_append.to_zarr(store_target, append_dim="time") + ds.to_zarr(store_target, mode="w", group=group) + ds_to_append.to_zarr(store_target, append_dim="time", group=group) original = xr.concat([ds, ds_to_append], dim="time") - assert_identical(original, xr.open_zarr(store_target)) + actual = xr.open_zarr(store_target, group=group) + assert_identical(original, actual) def test_compressor_encoding(self): original = create_test_data() From b155853ff6e17172b1b6b16c0da31522718e9409 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 2 Mar 2020 18:01:43 -0500 Subject: [PATCH 06/24] Turn on html repr by default (#3812) * Turn on html repr by default * Add By line to release docs * Change tests to expect html as the default display_style --- doc/whats-new.rst | 5 +++++ xarray/core/options.py | 2 +- xarray/tests/test_options.py | 22 ++++++++++++---------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2cc92c78ac8..151ba917cce 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,11 @@ New Features often means a user is attempting to pass multiple dimensions to group over and should instead pass a list. By `Maximilian Roos `_ +- The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced + in 0.14.1) is now on by default. To disable, use + ``xarray.set_options(display_style="text")``. + By `Julia Signell `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/options.py b/xarray/core/options.py index 72f9ad8e1fa..15d05159d6d 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -20,7 +20,7 @@ CMAP_SEQUENTIAL: "viridis", CMAP_DIVERGENT: "RdBu_r", KEEP_ATTRS: "default", - DISPLAY_STYLE: "text", + DISPLAY_STYLE: "html", } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index f155acbf494..19f74476ced 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -68,12 +68,12 @@ def test_nested_options(): def test_display_style(): - original = "text" + original = "html" assert OPTIONS["display_style"] == original with pytest.raises(ValueError): xarray.set_options(display_style="invalid_str") - with xarray.set_options(display_style="html"): - assert OPTIONS["display_style"] == "html" + with xarray.set_options(display_style="text"): + assert OPTIONS["display_style"] == "text" assert OPTIONS["display_style"] == original @@ -177,10 +177,11 @@ def test_merge_attr_retention(self): def test_display_style_text(self): ds = create_test_dataset_attrs() - text = ds._repr_html_() - assert text.startswith("
")
-        assert "'nested'" in text
-        assert "<xarray.Dataset>" in text
+        with xarray.set_options(display_style="text"):
+            text = ds._repr_html_()
+            assert text.startswith("
")
+            assert "'nested'" in text
+            assert "<xarray.Dataset>" in text
 
     def test_display_style_html(self):
         ds = create_test_dataset_attrs()
@@ -191,9 +192,10 @@ def test_display_style_html(self):
 
     def test_display_dataarray_style_text(self):
         da = create_test_dataarray_attrs()
-        text = da._repr_html_()
-        assert text.startswith("
")
-        assert "<xarray.DataArray 'var1'" in text
+        with xarray.set_options(display_style="text"):
+            text = da._repr_html_()
+            assert text.startswith("
")
+            assert "<xarray.DataArray 'var1'" in text
 
     def test_display_dataarray_style_html(self):
         da = create_test_dataarray_attrs()

From 1c5e1cd022a0ff91275c50a50d1c6f88a7abff7d Mon Sep 17 00:00:00 2001
From: Andrew Thomas 
Date: Mon, 2 Mar 2020 18:02:55 -0500
Subject: [PATCH 07/24] Coarsen keep attrs 3376 (#3801)

* Add test of DataWithCoords.coarsen() for #3376

* Add test of Variable.coarsen() for #3376

* Add keep_attrs kwarg to DataWithCoords.coarsen() for #3376

* Style and spelling fixes (#3376)

* Fix test_coarsen_keep_attrs by removing self from input

* Pass keep_attrs through to _coarsen_cls and _rolling_cls returns (#3376)

* Move keyword from coarsen to mean in test_coarsen_keep_attrs

* Start handling keep_attrs in rolling class constructors (#3376)

* Update Coarsen constructor and DatasetCoarsen class method (GH3376)

Assign keep_attrs keyword value to Coarsen objects in constructor
Add conditional inside _reduce_method.wrapped_func branching on self.keep_attrs and pass back to returned Dataset

* Incorporate code review from @max-sixty

* Fix Dataset.coarsen and Variable.coarsen for GH3376

Handle global keep_attrs setting inside Variable._coarsen_reshape

Pass attrs through consistently inside DatasetCoarsen._reduce_method

Don't pass Variable.coarsen a keyword argument it doesn't expect inside DataArrayCoarsen._reduce_method

* Update tests for GH3376

* Incorporate review changes to test_dataset for GH3376

Remove commented-out test from test_coarsen_keep_attrs

Add test_rolling_keep_attrs

* Change Rolling._dataset_implementation for GH3376

Return a Dataset object that results in test_rolling_keep_attrs Passing

* style fixes

* Remove duplicate variable assignment and document change (GH3776)
---
 doc/whats-new.rst             |  5 +++
 xarray/core/common.py         | 29 +++++++++++++--
 xarray/core/rolling.py        | 67 ++++++++++++++++++++++++++++-------
 xarray/core/variable.py       |  3 ++
 xarray/tests/test_dataset.py  | 56 +++++++++++++++++++++++++++++
 xarray/tests/test_variable.py | 22 +++++++++++-
 6 files changed, 165 insertions(+), 17 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 151ba917cce..089cbbe1be3 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -61,6 +61,11 @@ Bug fixes
 - xarray now respects the over, under and bad colors if set on a provided colormap.
   (:issue:`3590`, :pull:`3601`)
   By `johnomotani `_.
+- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)``
+  to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword
+  argument ``keep_attrs`` to change this setting. (:issue:`3376`,
+  :pull:`3801`) By `Andrew Thomas `_.
+  
 - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group`
   simultaneously. (:issue:`3170`). By `Matthias Meyer `_.
 
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 582ae310061..e3739d6d039 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -753,6 +753,7 @@ def rolling(
         dim: Mapping[Hashable, int] = None,
         min_periods: int = None,
         center: bool = False,
+        keep_attrs: bool = None,
         **window_kwargs: int,
     ):
         """
@@ -769,6 +770,10 @@ def rolling(
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
         **window_kwargs : optional
             The keyword arguments form of ``dim``.
             One of dim or window_kwargs must be provided.
@@ -810,8 +815,13 @@ def rolling(
         core.rolling.DataArrayRolling
         core.rolling.DatasetRolling
         """
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         dim = either_dict_or_kwargs(dim, window_kwargs, "rolling")
-        return self._rolling_cls(self, dim, min_periods=min_periods, center=center)
+        return self._rolling_cls(
+            self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs
+        )
 
     def rolling_exp(
         self,
@@ -859,6 +869,7 @@ def coarsen(
         boundary: str = "exact",
         side: Union[str, Mapping[Hashable, str]] = "left",
         coord_func: str = "mean",
+        keep_attrs: bool = None,
         **window_kwargs: int,
     ):
         """
@@ -879,8 +890,12 @@ def coarsen(
             multiple of the window size. If 'trim', the excess entries are
             dropped. If 'pad', NA will be padded.
         side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
-        coord_func : function (name) that is applied to the coordintes,
+        coord_func : function (name) that is applied to the coordinates,
             or a mapping from coordinate name to function (name).
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -915,9 +930,17 @@ def coarsen(
         core.rolling.DataArrayCoarsen
         core.rolling.DatasetCoarsen
         """
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen")
         return self._coarsen_cls(
-            self, dim, boundary=boundary, side=side, coord_func=coord_func
+            self,
+            dim,
+            boundary=boundary,
+            side=side,
+            coord_func=coord_func,
+            keep_attrs=keep_attrs,
         )
 
     def resample(
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index ea6d72b2e03..61178cfb15f 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -7,6 +7,7 @@
 from . import dtypes, duck_array_ops, utils
 from .dask_array_ops import dask_rolling_wrapper
 from .ops import inject_reduce_methods
+from .options import _get_keep_attrs
 from .pycompat import dask_array_type
 
 try:
@@ -42,10 +43,10 @@ class Rolling:
     DataArray.rolling
     """
 
-    __slots__ = ("obj", "window", "min_periods", "center", "dim")
-    _attributes = ("window", "min_periods", "center", "dim")
+    __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs")
+    _attributes = ("window", "min_periods", "center", "dim", "keep_attrs")
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object.
 
@@ -65,6 +66,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -89,6 +94,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         self.center = center
         self.dim = dim
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        self.keep_attrs = keep_attrs
+
     @property
     def _min_periods(self):
         return self.min_periods if self.min_periods is not None else self.window
@@ -143,7 +152,7 @@ def count(self):
 class DataArrayRolling(Rolling):
     __slots__ = ("window_labels",)
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object for DataArray.
         You should use DataArray.rolling() method to construct this object
@@ -165,6 +174,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -177,7 +190,11 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         Dataset.rolling
         Dataset.groupby
         """
-        super().__init__(obj, windows, min_periods=min_periods, center=center)
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        super().__init__(
+            obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs
+        )
 
         self.window_labels = self.obj[self.dim]
 
@@ -374,7 +391,7 @@ def _numpy_or_bottleneck_reduce(
 class DatasetRolling(Rolling):
     __slots__ = ("rollings",)
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object for Dataset.
         You should use Dataset.rolling() method to construct this object
@@ -396,6 +413,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -408,7 +429,7 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         Dataset.groupby
         DataArray.groupby
         """
-        super().__init__(obj, windows, min_periods, center)
+        super().__init__(obj, windows, min_periods, center, keep_attrs)
         if self.dim not in self.obj.dims:
             raise KeyError(self.dim)
         # Keep each Rolling object as a dictionary
@@ -416,7 +437,9 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         for key, da in self.obj.data_vars.items():
             # keeps rollings only for the dataset depending on slf.dim
             if self.dim in da.dims:
-                self.rollings[key] = DataArrayRolling(da, windows, min_periods, center)
+                self.rollings[key] = DataArrayRolling(
+                    da, windows, min_periods, center, keep_attrs
+                )
 
     def _dataset_implementation(self, func, **kwargs):
         from .dataset import Dataset
@@ -427,7 +450,8 @@ def _dataset_implementation(self, func, **kwargs):
                 reduced[key] = func(self.rollings[key], **kwargs)
             else:
                 reduced[key] = self.obj[key]
-        return Dataset(reduced, coords=self.obj.coords)
+        attrs = self.obj.attrs if self.keep_attrs else {}
+        return Dataset(reduced, coords=self.obj.coords, attrs=attrs)
 
     def reduce(self, func, **kwargs):
         """Reduce the items in this group by applying `func` along some
@@ -466,7 +490,7 @@ def _numpy_or_bottleneck_reduce(
             **kwargs,
         )
 
-    def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
+    def construct(self, window_dim, stride=1, fill_value=dtypes.NA, keep_attrs=None):
         """
         Convert this rolling object to xr.Dataset,
         where the window dimension is stacked as a new dimension
@@ -487,6 +511,9 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
 
         from .dataset import Dataset
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=True)
+
         dataset = {}
         for key, da in self.obj.data_vars.items():
             if self.dim in da.dims:
@@ -509,10 +536,18 @@ class Coarsen:
     DataArray.coarsen
     """
 
-    __slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess")
+    __slots__ = (
+        "obj",
+        "boundary",
+        "coord_func",
+        "windows",
+        "side",
+        "trim_excess",
+        "keep_attrs",
+    )
     _attributes = ("windows", "side", "trim_excess")
 
-    def __init__(self, obj, windows, boundary, side, coord_func):
+    def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs):
         """
         Moving window object.
 
@@ -541,6 +576,7 @@ def __init__(self, obj, windows, boundary, side, coord_func):
         self.windows = windows
         self.side = side
         self.boundary = boundary
+        self.keep_attrs = keep_attrs
 
         absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
         if absent_dims:
@@ -626,6 +662,11 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         def wrapped_func(self, **kwargs):
             from .dataset import Dataset
 
+            if self.keep_attrs:
+                attrs = self.obj.attrs
+            else:
+                attrs = {}
+
             reduced = {}
             for key, da in self.obj.data_vars.items():
                 reduced[key] = da.variable.coarsen(
@@ -644,7 +685,7 @@ def wrapped_func(self, **kwargs):
                     )
                 else:
                     coords[c] = v.variable
-            return Dataset(reduced, coords=coords)
+            return Dataset(reduced, coords=coords, attrs=attrs)
 
         return wrapped_func
 
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index daa8678157b..62f9fde6a2e 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1949,6 +1949,9 @@ def _coarsen_reshape(self, windows, boundary, side):
             else:
                 shape.append(variable.shape[i])
 
+        keep_attrs = _get_keep_attrs(default=False)
+        variable.attrs = variable._attrs if keep_attrs else {}
+
         return variable.data.reshape(shape), tuple(axes)
 
     @property
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 5e254c37e44..7bcf9379ae8 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -5664,6 +5664,62 @@ def test_coarsen_coords_cftime():
     np.testing.assert_array_equal(actual.time, expected_times)
 
 
+def test_coarsen_keep_attrs():
+    _attrs = {"units": "test", "long_name": "testing"}
+
+    var1 = np.linspace(10, 15, 100)
+    var2 = np.linspace(5, 10, 100)
+    coords = np.linspace(1, 10, 100)
+
+    ds = Dataset(
+        data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
+        coords={"coord": coords},
+        attrs=_attrs,
+    )
+
+    # Test dropped attrs
+    dat = ds.coarsen(coord=5).mean()
+    assert dat.attrs == {}
+
+    # Test kept attrs using dataset keyword
+    dat = ds.coarsen(coord=5, keep_attrs=True).mean()
+    assert dat.attrs == _attrs
+
+    # Test kept attrs using global option
+    with set_options(keep_attrs=True):
+        dat = ds.coarsen(coord=5).mean()
+    assert dat.attrs == _attrs
+
+
+def test_rolling_keep_attrs():
+    _attrs = {"units": "test", "long_name": "testing"}
+
+    var1 = np.linspace(10, 15, 100)
+    var2 = np.linspace(5, 10, 100)
+    coords = np.linspace(1, 10, 100)
+
+    ds = Dataset(
+        data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
+        coords={"coord": coords},
+        attrs=_attrs,
+    )
+
+    # Test dropped attrs
+    dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
+    assert dat.attrs == {}
+
+    # Test kept attrs using dataset keyword
+    dat = ds.rolling(
+        dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True
+    ).mean()
+    assert dat.attrs == _attrs
+
+    # Test kept attrs using global option
+    with set_options(keep_attrs=True):
+        dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
+    assert dat.attrs == _attrs
+
+
 def test_rolling_properties(ds):
     # catching invalid args
     with pytest.raises(ValueError, match="exactly one dim/window should"):
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 62fde920b1e..c86ecd0121f 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -9,7 +9,7 @@
 import pytz
 
 from xarray import Coordinate, Dataset, IndexVariable, Variable, set_options
-from xarray.core import dtypes, indexing
+from xarray.core import dtypes, duck_array_ops, indexing
 from xarray.core.common import full_like, ones_like, zeros_like
 from xarray.core.indexing import (
     BasicIndexer,
@@ -1879,6 +1879,26 @@ def test_coarsen_2d(self):
         expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
         assert_equal(actual, expected)
 
+    # perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops])
+    def test_coarsen_keep_attrs(self, operation="mean"):
+        _attrs = {"units": "test", "long_name": "testing"}
+
+        test_func = getattr(duck_array_ops, operation, None)
+
+        # Test dropped attrs
+        with set_options(keep_attrs=False):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == {}
+
+        # Test kept attrs
+        with set_options(keep_attrs=True):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == _attrs
+
 
 @requires_dask
 class TestVariableWithDask(VariableSubclassobjects):

From a333a5c73db078fa34324475f9d74d71d74d4659 Mon Sep 17 00:00:00 2001
From: Sander 
Date: Tue, 3 Mar 2020 01:38:04 +0100
Subject: [PATCH 08/24] =?UTF-8?q?removed=20mention=20that=20'dims'=20are?=
 =?UTF-8?q?=20inferred=20from=20'coords'-dict=20when=20omit=E2=80=A6=20(#3?=
 =?UTF-8?q?821)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* removed mention that 'dims' are inferred from 'coords'-dict when omitted in DataArray (fixes #3820)

* added summary of PR #3821 to whats-new
---
 doc/whats-new.rst        | 3 +++
 xarray/core/dataarray.py | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 089cbbe1be3..4a6083522ba 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -71,6 +71,9 @@ Bug fixes
 
 Documentation
 ~~~~~~~~~~~~~
+- Fix documentation of :py:class:`DataArray` removing the deprecated mention
+  that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`)
+  By `Sander van Rijn `_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 062cc6342df..b1da0ca1448 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -304,8 +304,7 @@ def __init__(
             Name(s) of the data dimension(s). Must be either a hashable (only
             for 1D data) or a sequence of hashables with length equal to the
             number of dimensions. If this argument is omitted, dimension names
-            are taken from ``coords`` (if possible) and otherwise default to
-            ``['dim_0', ... 'dim_n']``.
+            default to ``['dim_0', ... 'dim_n']``.
         name : str or None, optional
             Name of this array.
         attrs : dict_like or None, optional

From 01462d65c7213e5e1cddf36492c6a34a7e53ce55 Mon Sep 17 00:00:00 2001
From: dcherian 
Date: Wed, 4 Mar 2020 07:05:14 +0530
Subject: [PATCH 09/24] Use stable RTD image.

---
 readthedocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs.yml b/readthedocs.yml
index 88aee82a44b..173d61ec6f3 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,7 +1,7 @@
 version: 2
 
 build:
-    image: latest
+    image: stable
 
 conda:
     environment: ci/requirements/doc.yml

From b2f06cb9d36a2520fa4f3aee6c38cae9972e702e Mon Sep 17 00:00:00 2001
From: Deepak Cherian 
Date: Thu, 5 Mar 2020 18:26:11 +0530
Subject: [PATCH 10/24] DOC: Add rioxarray and other external examples (#3757)

* DOC: Add rioxarray link to examples and add example in file IO

* Add more external examples.

* fix spacing for ipython docs

* minor fixes

* fix bad edit

Co-authored-by: Deepak Cherian 
---
 doc/examples.rst |  9 +++++++++
 doc/io.rst       | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/doc/examples.rst b/doc/examples.rst
index 3067ca824be..805395808e0 100644
--- a/doc/examples.rst
+++ b/doc/examples.rst
@@ -17,3 +17,12 @@ Using apply_ufunc
     :maxdepth: 2
 
     examples/apply_ufunc_vectorize_1d
+
+External Examples
+-----------------
+.. toctree::
+    :maxdepth: 2
+
+    Managing raster data with rioxarray 
+    Xarray with dask 
+    Xarray and dask on the cloud with Pangeo 
diff --git a/doc/io.rst b/doc/io.rst
index e910943236f..6064aa3568a 100644
--- a/doc/io.rst
+++ b/doc/io.rst
@@ -759,9 +759,53 @@ for an example of how to convert these to longitudes and latitudes.
     considered as being experimental. Please report any bug you may find
     on xarray's github repository.
 
+
+Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other
+GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF.
+`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping.
+
+.. ipython::
+    :verbatim:
+
+    In [1]: import rioxarray
+
+    In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif')
+
+    In [3]: rds
+    Out[3]:
+    
+    [1703814 values with dtype=uint8]
+    Coordinates:
+      * band         (band) int64 1 2 3
+      * y            (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06
+      * x            (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05
+        spatial_ref  int64 0
+    Attributes:
+        STATISTICS_MAXIMUM:  255
+        STATISTICS_MEAN:     29.947726688477
+        STATISTICS_MINIMUM:  0
+        STATISTICS_STDDEV:   52.340921626611
+        transform:           (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827...
+        _FillValue:          0.0
+        scale_factor:        1.0
+        add_offset:          0.0
+        grid_mapping:        spatial_ref
+
+    In [4]: rds.rio.crs
+    Out[4]: CRS.from_epsg(32618)
+
+    In [5]: rds4326 = rio.rio.reproject("epsg:4326")
+
+    In [6]: rds4326.rio.crs
+    Out[6]: CRS.from_epsg(4326)
+
+    In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif')
+
+
 .. _rasterio: https://rasterio.readthedocs.io/en/latest/
+.. _rioxarray: https://corteva.github.io/rioxarray/stable/
 .. _test files: https://github.com/mapbox/rasterio/blob/master/tests/data/RGB.byte.tif
-.. _pyproj: https://github.com/jswhit/pyproj
+.. _pyproj: https://github.com/pyproj4/pyproj
 
 .. _io.zarr:
 

From 8fb47f282555fd1430b9621abedbed82cdac7d4a Mon Sep 17 00:00:00 2001
From: Deepak Cherian 
Date: Thu, 5 Mar 2020 18:26:54 +0530
Subject: [PATCH 11/24] Add note on diff's n differing from pandas (#3822)

* note that n != periods in diff docstring

* better wording based on feedback
---
 xarray/core/dataarray.py | 6 ++++++
 xarray/core/dataset.py   | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index b1da0ca1448..4e80ef222c2 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2692,6 +2692,12 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr
         difference : same type as caller
             The n-th order finite difference of this object.
 
+        .. note::
+
+            `n` matches numpy's behavior and is different from pandas' first
+            argument named `periods`.
+
+
         Examples
         --------
         >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ['x'])
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 7252dd2f3df..52940e98b27 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4879,6 +4879,11 @@ def diff(self, dim, n=1, label="upper"):
         difference : same type as caller
             The n-th order finite difference of this object.
 
+        .. note::
+
+            `n` matches numpy's behavior and is different from pandas' first
+            argument named `periods`.
+
         Examples
         --------
         >>> ds = xr.Dataset({'foo': ('x', [5, 5, 6, 6])})

From 69723ebf34cb9c37917b44b2ac1ab92ae553fecc Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Thu, 5 Mar 2020 22:36:07 -0500
Subject: [PATCH 12/24] Label "Installed Versions" item in Issue template
 (#3832)

* Label Installed Versions details in GH Issue template

* Update bug_report.md
---
 .github/ISSUE_TEMPLATE/bug_report.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index df5b2304bc3..83c3aea53a8 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -25,8 +25,9 @@ assignees: ''
 
 
 
-#### Output of ``xr.show_versions()``
-
+#### Versions + +
Output of `xr.show_versions()`
From 00e5b367c483656c67c63c47a2a9e07112bbc885 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 6 Mar 2020 08:57:17 -0500 Subject: [PATCH 13/24] update macos image (#3838) * update macos image * whatsnew --- azure-pipelines.yml | 2 +- doc/whats-new.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5789161c966..ce95fca1ba1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -38,7 +38,7 @@ jobs: py38: conda_env: py38 pool: - vmImage: 'macOS-10.13' + vmImage: 'macOS-10.15' steps: - template: ci/azure/unit-tests.yml diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4a6083522ba..99ee66fad67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -86,6 +86,8 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Updated Azure CI MacOS image, given pending removal. + By `Maximilian Roos `_ .. _whats-new.0.15.0: From 9fbb4170c1732fe2f3cd57b2b96d770a5bac50ed Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 6 Mar 2020 23:38:11 -0500 Subject: [PATCH 14/24] Allow `where` to receive a callable (#3827) * allow where to receive a callable * Update xarray/core/common.py Co-Authored-By: keewis * docstring * whatsnew Co-authored-by: keewis --- doc/whats-new.rst | 4 +++- xarray/core/common.py | 22 ++++++++++++++++++++++ xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 9 +++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 99ee66fad67..24120270444 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,7 +43,9 @@ New Features in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. By `Julia Signell `_. - +- :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a + first argument, which is then called on the input; replicating pandas' behavior. + By `Maximilian Roos `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index e3739d6d039..c80cb24c5b5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1119,6 +1119,15 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): >>> import numpy as np >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=('x', 'y')) + >>> a + + array([[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24]]) + Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4) array([[ 0., 1., 2., 3., nan], @@ -1127,6 +1136,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan, nan], [ nan, nan, nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 5, -1) array([[ 0, 1, 2, 3, 4], @@ -1135,6 +1145,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [15, 16, -1, -1, -1], [20, -1, -1, -1, -1]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4, drop=True) array([[ 0., 1., 2., 3.], @@ -1143,6 +1154,14 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(lambda x: x.x + x.y < 4, drop=True) + + array([[ 0., 1., 2., 3.], + [ 5., 6., 7., nan], + [ 10., 11., nan, nan], + [ 15., nan, nan, nan]]) + Dimensions without coordinates: x, y + See also -------- numpy.where : corresponding numpy function @@ -1152,6 +1171,9 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): from .dataarray import DataArray from .dataset import Dataset + if callable(cond): + cond = cond(self) + if drop: if other is not dtypes.NA: raise ValueError("cannot set `other` if drop=True") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 0a622d279ba..b8a9c5edaf9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2215,6 +2215,12 @@ def test_where(self): actual = arr.where(arr.x < 2, drop=True) assert_identical(actual, expected) + def test_where_lambda(self): + arr = DataArray(np.arange(4), dims="y") + expected = arr.sel(y=slice(2)) + actual = arr.where(lambda x: x.y < 2, drop=True) + assert_identical(actual, expected) + def test_where_string(self): array = DataArray(["a", "b"]) expected = DataArray(np.array(["a", np.nan], dtype=object)) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7bcf9379ae8..44ffafb23b1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4349,6 +4349,12 @@ def test_where(self): assert actual.a.name == "a" assert actual.a.attrs == ds.a.attrs + # lambda + ds = Dataset({"a": ("x", range(5))}) + expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) + actual = ds.where(lambda x: x > 1) + assert_identical(expected, actual) + def test_where_other(self): ds = Dataset({"a": ("x", range(5))}, {"x": range(5)}) expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)}) @@ -4356,6 +4362,9 @@ def test_where_other(self): assert_equal(expected, actual) assert actual.a.dtype == int + actual = ds.where(lambda x: x > 1, -1) + assert_equal(expected, actual) + with raises_regex(ValueError, "cannot set"): ds.where(ds > 1, other=0, drop=True) From cdaac64fa528222d947bbc821ac6c919f7fa7fa8 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Sun, 8 Mar 2020 18:42:43 +0100 Subject: [PATCH 15/24] Implement skipna kwarg in xr.quantile (#3844) * quick fix, no docs, no tests * added tests * docstrings * added whatsnew * Update doc/whats-new.rst Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * Update doc/whats-new.rst Co-Authored-By: keewis Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: keewis --- doc/whats-new.rst | 7 ++++++- xarray/core/dataarray.py | 11 +++++++++-- xarray/core/dataset.py | 13 +++++++++++-- xarray/core/groupby.py | 9 +++++++-- xarray/core/variable.py | 8 ++++++-- xarray/tests/test_dataarray.py | 8 +++++--- xarray/tests/test_dataset.py | 24 ++++++++++++++++++++---- xarray/tests/test_variable.py | 8 +++++--- 8 files changed, 69 insertions(+), 19 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 24120270444..2c30db99bcd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,7 +45,12 @@ New Features By `Julia Signell `_. - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. - By `Maximilian Roos `_ + By `Maximilian Roos `_. +- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, + :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` + (:issue:`3843`, :pull:`3844`) + By `Aaron Spring `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4e80ef222c2..7fcb42bf9d2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2939,6 +2939,7 @@ def quantile( dim: Union[Hashable, Sequence[Hashable], None] = None, interpolation: str = "linear", keep_attrs: bool = None, + skipna: bool = True, ) -> "DataArray": """Compute the qth quantile of the data along the specified dimension. @@ -2966,6 +2967,8 @@ def quantile( If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -2978,7 +2981,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile Examples -------- @@ -3015,7 +3018,11 @@ def quantile( """ ds = self._to_temp_dataset().quantile( - q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation + q, + dim=dim, + keep_attrs=keep_attrs, + interpolation=interpolation, + skipna=skipna, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 52940e98b27..f286236dd45 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5140,7 +5140,13 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile( - self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None + self, + q, + dim=None, + interpolation="linear", + numeric_only=False, + keep_attrs=None, + skipna=True, ): """Compute the qth quantile of the data along the specified dimension. @@ -5171,6 +5177,8 @@ def quantile( object will be returned without attributes. numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -5183,7 +5191,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, DataArray.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, DataArray.quantile Examples -------- @@ -5258,6 +5266,7 @@ def quantile( dim=reduce_dims, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) else: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index f2a9ebac6eb..4223d9dc255 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -558,7 +558,9 @@ def fillna(self, value): out = ops.fillna(self, value) return out - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -582,6 +584,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -595,7 +599,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile Examples @@ -656,6 +660,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): dim=dim, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) return out diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 62f9fde6a2e..435edb6f014 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1678,7 +1678,9 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """ return self.broadcast_equals(other, equiv=equiv) - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1725,6 +1727,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): from .computation import apply_ufunc + _quantile_func = np.nanquantile if skipna else np.quantile + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -1739,7 +1743,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc - return np.moveaxis(np.nanquantile(npa, **kwargs), 0, -1) + return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) result = apply_ufunc( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b8a9c5edaf9..33f1b403eb8 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2368,13 +2368,15 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + def test_quantile(self, q, axis, dim, skipna): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) if is_scalar(q): assert "quantile" not in actual.dims diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 44ffafb23b1..d2e8c6b7609 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4697,12 +4697,13 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q): + def test_quantile(self, q, skipna): ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) if is_scalar(q): assert "quantile" not in ds_quantile.dims else: @@ -4710,12 +4711,27 @@ def test_quantile(self, q): for var, dar in ds.data_vars.items(): assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + assert_identical( + ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna) + ) dim = ["dim1", "dim2"] - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) + @pytest.mark.parametrize("skipna", [True, False]) + def test_quantile_skipna(self, skipna): + q = 0.1 + dim = "time" + ds = Dataset({"a": ([dim], np.arange(0, 11))}) + ds = ds.where(ds >= 1) + + result = ds.quantile(q=q, dim=dim, skipna=skipna) + + value = 1.9 if skipna else np.nan + expected = Dataset({"a": value}, coords={"quantile": q}) + assert_identical(result, expected) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c86ecd0121f..c600f7a77d0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1511,14 +1511,16 @@ def test_reduce(self): with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): v.mean(dim="x", allow_lazy=False) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): + def test_quantile(self, q, axis, dim, skipna): v = Variable(["x", "y"], self.d) - actual = v.quantile(q, dim=dim) - expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + actual = v.quantile(q, dim=dim, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) @requires_dask From 603b0ad3f8a02a9e1180eb8dfc72f7f885f0e19a Mon Sep 17 00:00:00 2001 From: Mirko Panighel <30869713+mpanighel@users.noreply.github.com> Date: Sun, 8 Mar 2020 18:43:36 +0100 Subject: [PATCH 16/24] Add nxarray to related-projects.rst (#3848) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 3188751366f..edee80b72b8 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -61,6 +61,7 @@ Extend xarray capabilities - `Collocate `_: Collocate xarray trajectories in arbitrary physical dimensions - `eofs `_: EOF analysis in Python. - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. +- `nxarray `_: NeXus input/output capability for xarray. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. From 203c3f4ee1b4220b3fa3a073b5412fb7bd72525b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 9 Mar 2020 03:11:55 -0400 Subject: [PATCH 17/24] remove panel conversion (#3845) --- doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 10 ++-------- xarray/tests/test_dataarray.py | 8 ++------ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2c30db99bcd..ed94b84feea 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -95,6 +95,9 @@ Internal Changes By `Bruno Pagani `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ +- Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas + in favor of xarray's objects. + By `Maximilian Roos `_ .. _whats-new.0.15.0: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7fcb42bf9d2..7a95aedc2f7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2243,20 +2243,14 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: * 0D -> `xarray.DataArray` * 1D -> `pandas.Series` * 2D -> `pandas.DataFrame` - * 3D -> `pandas.Panel` *(deprecated)* - Only works for arrays with 3 or fewer dimensions. + Only works for arrays with 2 or fewer dimensions. The DataArray constructor performs the inverse transformation. """ # TODO: consolidate the info about pandas constructors and the # attributes that correspond to their indexes into a separate module? - constructors = { - 0: lambda x: x, - 1: pd.Series, - 2: pd.DataFrame, - 3: pdcompat.Panel, - } + constructors = {0: lambda x: x, 1: pd.Series, 2: pd.DataFrame} try: constructor = constructors[self.ndim] except KeyError: diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 33f1b403eb8..dfaf8fd4e28 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3411,14 +3411,10 @@ def test_to_pandas(self): assert_array_equal(actual.columns, [0, 1]) # roundtrips - for shape in [(3,), (3, 4), (3, 4, 5)]: - if len(shape) > 2 and LooseVersion(pd.__version__) >= "0.25.0": - continue + for shape in [(3,), (3, 4)]: dims = list("abc")[: len(shape)] da = DataArray(np.random.randn(*shape), dims=dims) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - roundtripped = DataArray(da.to_pandas()).drop_vars(dims) + roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) with raises_regex(ValueError, "cannot convert"): From f4ebbfef8f317205fba9edecadaac843dfa131f7 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Mar 2020 08:18:06 +0100 Subject: [PATCH 18/24] un-xfail tests that append to netCDF files with scipy (#3805) * remove ScipyWriteBase class * add whats new Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/tests/test_backends.py | 24 ++++-------------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ed94b84feea..bc0e5092d5b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -95,6 +95,8 @@ Internal Changes By `Bruno Pagani `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ +- Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). + By `Mathias Hauser `_. - Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 015d2cbfdeb..59ed8e690cc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1979,24 +1979,8 @@ def create_zarr_target(self): yield tmp -class ScipyWriteBase(CFEncodedBase, NetCDF3Only): - def test_append_write(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_write() - - def test_append_overwrite_values(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_overwrite_values() - - @requires_scipy -class TestScipyInMemoryData(ScipyWriteBase): +class TestScipyInMemoryData(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2017,7 +2001,7 @@ def test_bytes_pickle(self): @requires_scipy -class TestScipyFileObject(ScipyWriteBase): +class TestScipyFileObject(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2050,7 +2034,7 @@ def test_pickle_dataarray(self): @requires_scipy -class TestScipyFilePath(ScipyWriteBase): +class TestScipyFilePath(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -3317,7 +3301,7 @@ def test_session(self): @requires_scipy @requires_pynio -class TestPyNio(ScipyWriteBase): +class TestPyNio(CFEncodedBase, NetCDF3Only): def test_write_store(self): # pynio is read-only for now pass From 9f97c4384f6456a5582f2bf7277c90be110fce92 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 9 Mar 2020 08:40:45 +0100 Subject: [PATCH 19/24] Pint support for top-level functions (#3611) * get the align tests to pass * add pint to the upstream-dev ci job * special case for booleans * silence the pint behaviour change warning * preprocess the unit mapping parameter to convert_units * use assert_allclose and assert_identical instead * clean up a few tests * remove some xfails * use the unit registry's quantity class * explain the catch_warnings block * don't use the function wrapper class if we don't need arguments * whats-new.rst * require the new pint version * use functools.partial instead of function * remove the convert_from parameter of array_attach_units * make sure every top-level function test uses assert_units_equal * hide the traceback of the unit comparison function * considerably simplify the merge_dataarray test * simplify the merge_dataset test --- ci/requirements/py36-min-nep18.yml | 2 +- doc/whats-new.rst | 2 +- xarray/tests/test_units.py | 356 ++++++++++++++++------------- 3 files changed, 203 insertions(+), 157 deletions(-) diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index c10fdf67dc4..a5eded49cd4 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -11,7 +11,7 @@ dependencies: - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - - pint=0.9 # Actually not enough as it doesn't implement __array_function__yet! + - pint=0.11 - pip - pytest - pytest-cov diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bc0e5092d5b..00c63b81260 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,7 +32,7 @@ New Features - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. -- implement pint support. (:issue:`3594`, :pull:`3706`) +- Support unit aware arrays with pint. (:issue:`3594`, :pull:`3706`, :pull:`3611`) By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 9f63ebb1d42..bef3af62d74 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1,3 +1,4 @@ +import functools import operator from distutils.version import LooseVersion @@ -8,6 +9,7 @@ import xarray as xr from xarray.core import formatting from xarray.core.npcompat import IS_NEP18_ACTIVE +from xarray.testing import assert_allclose, assert_identical from .test_variable import VariableSubclassobjects @@ -70,53 +72,17 @@ def array_strip_units(array): return array -def array_attach_units(data, unit, convert_from=None): - try: - unit, convert_from = unit - except TypeError: - pass - +def array_attach_units(data, unit): if isinstance(data, Quantity): - if not convert_from: - raise ValueError( - "cannot attach unit {unit} to quantity ({data.units})".format( - unit=unit, data=data - ) - ) - elif isinstance(convert_from, unit_registry.Unit): - data = data.magnitude - elif convert_from is True: # intentionally accept exactly true - if data.check(unit): - convert_from = data.units - data = data.magnitude - else: - raise ValueError( - "cannot convert quantity ({data.units}) to {unit}".format( - unit=unit, data=data - ) - ) - else: - raise ValueError( - "cannot convert from invalid unit {convert_from}".format( - convert_from=convert_from - ) - ) + raise ValueError(f"cannot attach unit {unit} to quantity {data}") - # to make sure we also encounter the case of "equal if converted" - if convert_from is not None: - quantity = (data * convert_from).to( - unit - if isinstance(unit, unit_registry.Unit) - else unit_registry.dimensionless - ) - else: - try: - quantity = data * unit - except np.core._exceptions.UFuncTypeError: - if unit != 1: - raise + try: + quantity = data * unit + except np.core._exceptions.UFuncTypeError: + if isinstance(unit, unit_registry.Unit): + raise - quantity = data + quantity = data return quantity @@ -241,6 +207,11 @@ def attach_units(obj, units): def convert_units(obj, to): + # preprocess + to = { + key: None if not isinstance(value, unit_registry.Unit) else value + for key, value in to.items() + } if isinstance(obj, xr.Dataset): data_vars = { name: convert_units(array.variable, {None: to.get(name)}) @@ -282,6 +253,7 @@ def convert_units(obj, to): def assert_units_equal(a, b): + __tracebackhide__ = True assert extract_units(a) == extract_units(b) @@ -414,9 +386,8 @@ def __repr__(self): return f"function_{self.name}" -@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way") def test_apply_ufunc_dataarray(dtype): - func = function( + func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) @@ -427,12 +398,12 @@ def test_apply_ufunc_dataarray(dtype): expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way") def test_apply_ufunc_dataset(dtype): - func = function( + func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) @@ -450,10 +421,10 @@ def test_apply_ufunc_dataset(dtype): expected = attach_units(func(strip_units(ds)), extract_units(ds)) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -475,36 +446,40 @@ def test_apply_ufunc_dataset(dtype): "coords", ), ) -@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) +@pytest.mark.parametrize("fill_value", (10, np.nan)) def test_align_dataarray(fill_value, variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, 1, 1), - "dims": (original_unit, unit, 1), - "coords": (original_unit, 1, unit), + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), } data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit x = np.arange(2) * original_unit - x_a1 = np.array([10, 5]) * original_unit - x_a2 = np.array([10, 5]) * coord_unit y1 = np.arange(5) * original_unit y2 = np.arange(2, 7) * dim_unit + y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit + y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit - data_array1 = xr.DataArray( - data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y") - ) - data_array2 = xr.DataArray( - data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y") - ) + coords1 = {"x": x, "y": y1} + coords2 = {"x": x, "y": y2} + if variant == "coords": + coords1["y_a"] = ("y", y_a1) + coords2["y_a"] = ("y", y_a2) + + data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y")) + data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y")) fill_value = fill_value * data_unit func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): func(data_array1, data_array2) @@ -524,15 +499,19 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): **stripped_kwargs, ) expected_a = attach_units(expected_a, units_a) - expected_b = convert_units(attach_units(expected_b, units_a), units_b) + if isinstance(array2, Quantity): + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + else: + expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(data_array1, data_array2) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_allclose(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_allclose(expected_b, actual_b) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -558,31 +537,37 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): def test_align_dataset(fill_value, unit, variant, error, dtype): original_unit = unit_registry.m - variants = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)} + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit x = np.arange(2) * original_unit - x_a1 = np.array([10, 5]) * original_unit - x_a2 = np.array([10, 5]) * coord_unit y1 = np.arange(5) * original_unit y2 = np.arange(2, 7) * dim_unit + y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit + y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit - ds1 = xr.Dataset( - data_vars={"a": (("x", "y"), array1)}, - coords={"x": x, "x_a": ("x", x_a1), "y": y1}, - ) - ds2 = xr.Dataset( - data_vars={"a": (("x", "y"), array2)}, - coords={"x": x, "x_a": ("x", x_a2), "y": y2}, - ) + coords1 = {"x": x, "y": y1} + coords2 = {"x": x, "y": y2} + if variant == "coords": + coords1["y_a"] = ("y", y_a1) + coords2["y_a"] = ("y", y_a2) + + ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1) + ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2) fill_value = fill_value * data_unit func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): func(ds1, ds2) @@ -600,12 +585,17 @@ def test_align_dataset(fill_value, unit, variant, error, dtype): strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs ) expected_a = attach_units(expected_a, units_a) - expected_b = convert_units(attach_units(expected_b, units_a), units_b) + if isinstance(array2, Quantity): + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + else: + expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(ds1, ds2) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_allclose(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_allclose(expected_b, actual_b) def test_broadcast_dataarray(dtype): @@ -615,28 +605,53 @@ def test_broadcast_dataarray(dtype): a = xr.DataArray(data=array1, dims="x") b = xr.DataArray(data=array2, dims="y") - expected_a, expected_b = tuple( - attach_units(elem, extract_units(a)) - for elem in xr.broadcast(strip_units(a), strip_units(b)) - ) + units_a = extract_units(a) + units_b = extract_units(b) + expected_a, expected_b = xr.broadcast(strip_units(a), strip_units(b)) + expected_a = attach_units(expected_a, units_a) + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + actual_a, actual_b = xr.broadcast(a, b) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_identical(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_identical(expected_b, actual_b) def test_broadcast_dataset(dtype): array1 = np.linspace(0, 10, 2) * unit_registry.Pa array2 = np.linspace(0, 10, 3) * unit_registry.Pa - ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)}) + x1 = np.arange(2) + y1 = np.arange(3) + + x2 = np.arange(2, 4) + y2 = np.arange(3, 6) - (expected,) = tuple( - attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds)) + ds = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("y", array2)}, coords={"x": x1, "y": y1} + ) + other = xr.Dataset( + data_vars={ + "a": ("x", array1.to(unit_registry.hPa)), + "b": ("y", array2.to(unit_registry.hPa)), + }, + coords={"x": x2, "y": y2}, ) - (actual,) = xr.broadcast(ds) - assert_equal_with_units(expected, actual) + units_a = extract_units(ds) + units_b = extract_units(other) + expected_a, expected_b = xr.broadcast(strip_units(ds), strip_units(other)) + expected_a = attach_units(expected_a, units_a) + expected_b = attach_units(expected_b, units_b) + + actual_a, actual_b = xr.broadcast(ds, other) + + assert_units_equal(expected_a, actual_a) + assert_identical(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_identical(expected_b, actual_b) @pytest.mark.parametrize( @@ -706,7 +721,8 @@ def test_combine_by_coords(variant, unit, error, dtype): ) actual = xr.combine_by_coords([ds, other]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -717,12 +733,7 @@ def test_combine_by_coords(variant, unit, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.mm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="wrong order of arguments to `where`"), - ), + pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, @@ -810,7 +821,8 @@ def test_combine_nested(variant, unit, error, dtype): ) actual = func([[ds1, ds2], [ds3, ds4]]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -862,7 +874,8 @@ def test_concat_dataarray(variant, unit, error, dtype): ) actual = xr.concat([arr1, arr2], dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -912,10 +925,10 @@ def test_concat_dataset(variant, unit, error, dtype): ) actual = xr.concat([ds1, ds2], dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -948,64 +961,81 @@ def test_merge_dataarray(variant, unit, error, dtype): data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit + x1 = np.arange(2) * original_unit + y1 = np.arange(3) * original_unit + u1 = np.linspace(10, 20, 2) * original_unit + v1 = np.linspace(10, 20, 3) * original_unit + array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit - array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + x2 = np.arange(2, 4) * dim_unit + z2 = np.arange(4) * original_unit + u2 = np.linspace(20, 30, 2) * coord_unit + w2 = np.linspace(10, 20, 4) * original_unit - x = np.arange(2) * original_unit - y = np.arange(3) * original_unit - z = np.arange(4) * original_unit - u = np.linspace(10, 20, 2) * original_unit - v = np.linspace(10, 20, 3) * original_unit - w = np.linspace(10, 20, 4) * original_unit + array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + y3 = np.arange(3, 6) * dim_unit + z3 = np.arange(4, 8) * dim_unit + v3 = np.linspace(10, 20, 3) * coord_unit + w3 = np.linspace(10, 20, 4) * coord_unit arr1 = xr.DataArray( name="a", data=array1, - coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)}, + coords={"x": x1, "y": y1, "u": ("x", u1), "v": ("y", v1)}, dims=("x", "y"), ) arr2 = xr.DataArray( - name="b", + name="a", data=array2, - coords={ - "x": np.arange(2, 4) * dim_unit, - "z": z, - "u": ("x", np.linspace(20, 30, 2) * coord_unit), - "w": ("z", w), - }, + coords={"x": x2, "z": z2, "u": ("x", u2), "w": ("z", w2)}, dims=("x", "z"), ) arr3 = xr.DataArray( - name="c", + name="a", data=array3, - coords={ - "y": np.arange(3, 6) * dim_unit, - "z": np.arange(4, 8) * dim_unit, - "v": ("y", np.linspace(10, 20, 3) * coord_unit), - "w": ("z", np.linspace(10, 20, 4) * coord_unit), - }, + coords={"y": y3, "z": z3, "v": ("y", v3), "w": ("z", w3)}, dims=("y", "z"), ) - func = function(xr.merge) if error is not None: with pytest.raises(error): - func([arr1, arr2, arr3]) + xr.merge([arr1, arr2, arr3]) return - units = {name: original_unit for name in list("abcuvwxyz")} + units = {name: original_unit for name in list("axyzuvw")} + convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) - expected = attach_units( - func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]), - units, + expected_units = { + "a": original_unit, + "u": original_unit, + "v": original_unit, + "w": original_unit, + "x": original_unit, + "y": original_unit, + "z": original_unit, + } + + expected = convert_units( + attach_units( + xr.merge( + [ + convert_and_strip(arr1), + convert_and_strip(arr2), + convert_and_strip(arr3), + ] + ), + units, + ), + expected_units, ) - actual = func([arr1, arr2, arr3]) - assert_equal_with_units(expected, actual) + actual = xr.merge([arr1, arr2, arr3]) + + assert_units_equal(expected, actual) + assert_allclose(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -1046,7 +1076,7 @@ def test_merge_dataset(variant, unit, error, dtype): ds1 = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, - coords={"x": x, "y": y, "z": ("x", z)}, + coords={"x": x, "y": y, "u": ("x", z)}, ) ds2 = xr.Dataset( data_vars={ @@ -1056,18 +1086,18 @@ def test_merge_dataset(variant, unit, error, dtype): coords={ "x": np.arange(3) * dim_unit, "y": np.arange(2, 4) * dim_unit, - "z": ("x", np.arange(-3, 0) * coord_unit), + "u": ("x", np.arange(-3, 0) * coord_unit), }, ) ds3 = xr.Dataset( data_vars={ - "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), - "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit), + "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit), }, coords={ "x": np.arange(3, 6) * dim_unit, "y": np.arange(4, 6) * dim_unit, - "z": ("x", np.arange(3, 6) * coord_unit), + "u": ("x", np.arange(3, 6) * coord_unit), }, ) @@ -1080,12 +1110,20 @@ def test_merge_dataset(variant, unit, error, dtype): units = extract_units(ds1) convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) - expected = attach_units( - func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units + expected_units = {name: original_unit for name in list("abxyzu")} + expected = convert_units( + attach_units( + func( + [convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)] + ), + units, + ), + expected_units, ) actual = func([ds1, ds2, ds3]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_allclose(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -1094,10 +1132,12 @@ def test_replication_dataarray(func, dtype): data_array = xr.DataArray(data=array, dims="x") numpy_func = getattr(np, func.__name__) - expected = xr.DataArray(data=numpy_func(array), dims="x") + units = extract_units(numpy_func(data_array)) + expected = attach_units(func(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -1114,12 +1154,13 @@ def test_replication_dataset(func, dtype): ) numpy_func = getattr(np, func.__name__) - expected = ds.copy( - data={name: numpy_func(array.data) for name, array in ds.data_vars.items()} - ) + units = extract_units(ds.map(numpy_func)) + expected = attach_units(func(strip_units(ds)), units) + actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail( @@ -1158,7 +1199,8 @@ def test_replication_full_like_dataarray(unit, error, dtype): ) actual = xr.full_like(data_array, fill_value=fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail( @@ -1208,7 +1250,8 @@ def test_replication_full_like_dataset(unit, error, dtype): ) actual = xr.full_like(ds, fill_value=fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -1250,7 +1293,8 @@ def test_where_dataarray(fill_value, unit, error, dtype): ) actual = xr.where(cond, x, fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -1294,7 +1338,8 @@ def test_where_dataset(fill_value, unit, error, dtype): ) actual = xr.where(cond, ds, fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) def test_dot_dataarray(dtype): @@ -1315,7 +1360,8 @@ def test_dot_dataarray(dtype): ) actual = xr.dot(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) def delete_attrs(*to_delete): From 1db010bb1f84c63c45c1317a78e89362587e1423 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Mar 2020 15:07:02 +0100 Subject: [PATCH 20/24] update installation instruction (#3849) * installing.rst: update instructions * whats-new * explicit link and anchor * :doc: -> :ref: --- doc/installing.rst | 15 ++++++++------- doc/whats-new.rst | 3 +++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/doc/installing.rst b/doc/installing.rst index dfc2841a956..a25bf65e342 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -11,6 +11,8 @@ Required dependencies - `numpy `__ (1.15 or later) - `pandas `__ (0.25 or later) +.. _optional-dependencies: + Optional dependencies --------------------- @@ -24,7 +26,7 @@ For netCDF and IO - `h5netcdf `__: an alternative library for reading and writing netCDF4 files that does not use the netCDF-C libraries - `pynio `__: for reading GRIB and other - geoscience specific file formats + geoscience specific file formats. Note that pynio is not available for Windows. - `zarr `__: for chunked, compressed, N-dimensional arrays. - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before @@ -121,16 +123,15 @@ xarray itself is a pure Python package, but its dependencies are not. The easiest way to get everything installed is to use conda_. To install xarray with its recommended dependencies using the conda command line tool:: - $ conda install xarray dask netCDF4 bottleneck + $ conda install -c conda-forge xarray dask netCDF4 bottleneck .. _conda: http://conda.io/ -We recommend using the community maintained `conda-forge `__ channel if you need difficult\-to\-build dependencies such as cartopy, pynio or PseudoNetCDF:: - - $ conda install -c conda-forge xarray cartopy pynio pseudonetcdf +If you require other :ref:`optional-dependencies` add them to the line above. -New releases may also appear in conda-forge before being updated in the default -channel. +We recommend using the community maintained `conda-forge `__ channel, +as some of the dependencies are difficult to build. New releases may also appear in conda-forge before +being updated in the default channel. If you don't use conda, be sure you have the required dependencies (numpy and pandas) installed first. Then, install xarray with pip:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 00c63b81260..3f04ba4ec57 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -81,6 +81,9 @@ Documentation - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) By `Sander van Rijn `_. +- Update the installation instructions: only explicitly list recommended dependencies + (:issue:`3756`). + By `Mathias Hauser `_. Internal Changes ~~~~~~~~~~~~~~~~ From 7927c2b79e4dd7ecebb648e8e64e2647405b08db Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 9 Mar 2020 23:06:07 -0700 Subject: [PATCH 21/24] add xpublish to related projects (#3850) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index edee80b72b8..57b8da0c447 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -63,6 +63,7 @@ Extend xarray capabilities - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. - `nxarray `_: NeXus input/output capability for xarray. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). +- `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API. - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. - `X-regression `_: Multiple linear regression from Statsmodels library coupled with Xarray library. From 739b34767ddd19b6168af05ee749b527266c104d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 10 Mar 2020 10:02:59 -0400 Subject: [PATCH 22/24] Doctests fixes (#3846) * start of doctest fixes * start of doctest fixes --- conftest.py | 11 +++++++++++ doc/contributing.rst | 8 ++++---- xarray/core/dataarray.py | 19 ++++++++++++------- xarray/core/dataset.py | 11 ++++++++--- xarray/core/rolling.py | 15 ++++++++------- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/conftest.py b/conftest.py index 25dc284975e..712af1d3759 100644 --- a/conftest.py +++ b/conftest.py @@ -21,3 +21,14 @@ def pytest_runtest_setup(item): pytest.skip( "set --run-network-tests to run test requiring an " "internet connection" ) + + +@pytest.fixture(autouse=True) +def add_standard_imports(doctest_namespace): + import numpy as np + import pandas as pd + import xarray as xr + + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + doctest_namespace["xr"] = xr diff --git a/doc/contributing.rst b/doc/contributing.rst index eb31db24591..f581bcd9741 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -51,8 +51,8 @@ Bug reports must: `_:: ```python - >>> from xarray import Dataset - >>> df = Dataset(...) + >>> import xarray as xr + >>> df = xr.Dataset(...) ... ``` @@ -378,8 +378,8 @@ and then running:: pre-commit install -from the root of the xarray repository. You can skip the pre-commit checks with -``git commit --no-verify``. +from the root of the xarray repository. You can skip the pre-commit checks +with ``git commit --no-verify``. Backwards Compatibility diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7a95aedc2f7..6782070da0b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1475,20 +1475,23 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Examples -------- + >>> arr = xr.DataArray(data=[0, 1], dims="x", - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> arr array([0, 1]) Coordinates: * x (x) >> arr.swap_dims({"x": "y"}) array([0, 1]) Coordinates: x (y) >> arr.swap_dims({"x": "z"}) array([0, 1]) @@ -1718,7 +1721,7 @@ def stack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1768,7 +1771,7 @@ def unstack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1817,7 +1820,7 @@ def to_unstacked_dataset(self, dim, level=0): Examples -------- >>> import xarray as xr - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> data = xr.Dataset({'a': arr, 'b': arr.isel(y=0)}) >>> data @@ -2623,7 +2626,7 @@ def plot(self) -> _PlotMethods: """ Access plotting functions for DataArray's - >>> d = DataArray([[1, 2], [3, 4]]) + >>> d = xr.DataArray([[1, 2], [3, 4]]) For convenience just call this directly @@ -2849,18 +2852,20 @@ def dot( -------- >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) - >>> da = DataArray(da_vals, dims=['x', 'y', 'z']) + >>> da = xr.DataArray(da_vals, dims=['x', 'y', 'z']) >>> dm_vals = np.arange(4) - >>> dm = DataArray(dm_vals, dims=['z']) + >>> dm = xr.DataArray(dm_vals, dims=['z']) >>> dm.dims ('z') + >>> da.dims ('x', 'y', 'z') >>> dot_result = da.dot(dm) >>> dot_result.dims ('x', 'y') + """ if isinstance(other, Dataset): raise NotImplementedError( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f286236dd45..a4d20a79b7c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1011,7 +1011,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": >>> da = xr.DataArray(np.random.randn(2, 3)) >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, - coords={'x': ['one', 'two']}) + ... coords={'x': ['one', 'two']}) >>> ds.copy() Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1021,6 +1021,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 -0.8079 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds_0 = ds.copy(deep=False) >>> ds_0['foo'][0, 0] = 7 >>> ds_0 @@ -1032,6 +1033,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 7.0 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1055,6 +1057,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) int64 0 1 2 3 4 5 bar (x) >> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -2883,7 +2886,7 @@ def swap_dims( Examples -------- >>> ds = xr.Dataset(data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> ds Dimensions: (x: 2) @@ -2893,6 +2896,7 @@ def swap_dims( Data variables: a (x) int64 5 7 b (x) float64 0.1 2.4 + >>> ds.swap_dims({"x": "y"}) Dimensions: (y: 2) @@ -2902,6 +2906,7 @@ def swap_dims( Data variables: a (y) int64 5 7 b (y) float64 0.1 2.4 + >>> ds.swap_dims({"x": "z"}) Dimensions: (z: 2) @@ -3341,7 +3346,7 @@ def to_stacked_array( Examples -------- - >>> data = Dataset( + >>> data = xr.Dataset( ... data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), ... 'b': ('x', [6, 7])}, ... coords={'y': ['u', 'v', 'w']} diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 61178cfb15f..5f633abbde6 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -231,21 +231,22 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling = da.rolling(b=3, center=True) >>> rolling.construct('window_dim') array([[[np.nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, np.nan]], [[np.nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, np.nan]]]) Dimensions without coordinates: a, b, window_dim + """ from .dataarray import DataArray @@ -278,26 +279,26 @@ def reduce(self, func, **kwargs): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling.reduce(np.sum) array([[nan, nan, 3., 6.], [nan, nan, 15., 18.]]) Dimensions without coordinates: a, b - >>> + >>> rolling = da.rolling(b=3, min_periods=1) >>> rolling.reduce(np.nansum) array([[ 0., 1., 3., 6.], [ 4., 9., 15., 18.]]) + """ rolling_dim = utils.get_temp_dimname(self.obj.dims, "_rolling_dim") windows = self.construct(rolling_dim) From 650a981734ce3291f5aaa68648ebde451339f28a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 13 Mar 2020 02:14:41 -0400 Subject: [PATCH 23/24] Fix CFTimeIndex-related errors stemming from updates in pandas (#3764) * Allow subtraction of a generic Index of cftime.datetimes from a CFTimeIndex * black * Test that NotImplemented logic works * Vendor _get_nearest_indexer and _filter_indexer_tolerance * Test OverflowError in __rsub__ * Fix name of pandas method in docstring * Add what's new entries * Enable use of tolerance greater than 292 years * newlinw Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 6 +++ xarray/coding/cftimeindex.py | 54 +++++++++++++++++++++++---- xarray/tests/test_cftimeindex.py | 63 +++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3f04ba4ec57..80309dc4673 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -96,6 +96,12 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Preserved the ability to index with ``method="nearest"`` with a + :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 + (:issue:`3751`). By `Spencer Clark `_. +- Greater flexibility and improved test coverage of subtracting various types + of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark + `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ - Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 99f90430e91..1ea5d3a7d11 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -49,6 +49,7 @@ from xarray.core.utils import is_scalar +from ..core.common import _contains_cftime_datetimes from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name @@ -326,6 +327,32 @@ def _get_string_slice(self, key): raise KeyError(key) return loc + def _get_nearest_indexer(self, target, limit, tolerance): + """Adapted from pandas.Index._get_nearest_indexer""" + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + left_distances = abs(self.values[left_indexer] - target.values) + right_distances = abs(self.values[right_indexer] - target.values) + + if self.is_monotonic_increasing: + condition = (left_distances < right_distances) | (right_indexer == -1) + else: + condition = (left_distances <= right_distances) | (right_indexer == -1) + indexer = np.where(condition, left_indexer, right_indexer) + + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + def _filter_indexer_tolerance(self, target, indexer, tolerance): + """Adapted from pandas.Index._filter_indexer_tolerance""" + if isinstance(target, pd.Index): + distance = abs(self.values[indexer] - target.values) + else: + distance = abs(self.values[indexer] - target) + indexer = np.where(distance <= tolerance, indexer, -1) + return indexer + def get_loc(self, key, method=None, tolerance=None): """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" if isinstance(key, str): @@ -427,9 +454,11 @@ def __radd__(self, other): return CFTimeIndex(other + np.array(self)) def __sub__(self, other): - import cftime - - if isinstance(other, (CFTimeIndex, cftime.datetime)): + if _contains_datetime_timedeltas(other): + return CFTimeIndex(np.array(self) - other) + elif isinstance(other, pd.TimedeltaIndex): + return CFTimeIndex(np.array(self) - other.to_pytimedelta()) + elif _contains_cftime_datetimes(np.array(other)): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) except OverflowError: @@ -437,14 +466,17 @@ def __sub__(self, other): "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." ) - - elif isinstance(other, pd.TimedeltaIndex): - return CFTimeIndex(np.array(self) - other.to_pytimedelta()) else: - return CFTimeIndex(np.array(self) - other) + return NotImplemented def __rsub__(self, other): - return pd.TimedeltaIndex(other - np.array(self)) + try: + return pd.TimedeltaIndex(other - np.array(self)) + except OverflowError: + raise ValueError( + "The time difference exceeds the range of values " + "that can be expressed at the nanosecond resolution." + ) def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. @@ -633,6 +665,12 @@ def _parse_array_of_cftime_strings(strings, date_type): ).reshape(strings.shape) +def _contains_datetime_timedeltas(array): + """Check if an input array contains datetime.timedelta objects.""" + array = np.atleast_1d(array) + return isinstance(array[0], timedelta) + + def _cftimeindex_from_i8(values, date_type, name): """Construct a CFTimeIndex from an array of integers. diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8d83b833ca3..43d6d7b068e 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -451,10 +451,21 @@ def test_sel_date_scalar(da, date_type, index): @pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") +@requires_cftime +def test_sel_date_distant_date(da, date_type, index): + expected = xr.DataArray(4).assign_coords(time=index[3]) + result = da.sel(time=date_type(2000, 1, 1), method="nearest") + assert_identical(result, expected) + + @requires_cftime @pytest.mark.parametrize( "sel_kwargs", - [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}], + [ + {"method": "nearest"}, + {"method": "nearest", "tolerance": timedelta(days=70)}, + {"method": "nearest", "tolerance": timedelta(days=1800000)}, + ], ) def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): expected = xr.DataArray(2).assign_coords(time=index[1]) @@ -738,7 +749,7 @@ def test_timedeltaindex_add_cftimeindex(calendar): @requires_cftime -def test_cftimeindex_sub(index): +def test_cftimeindex_sub_timedelta(index): date_type = index.date_type expected_dates = [ date_type(1, 1, 2), @@ -753,6 +764,27 @@ def test_cftimeindex_sub(index): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize( + "other", + [np.array(4 * [timedelta(days=1)]), np.array(timedelta(days=1))], + ids=["1d-array", "scalar-array"], +) +def test_cftimeindex_sub_timedelta_array(index, other): + date_type = index.date_type + expected_dates = [ + date_type(1, 1, 2), + date_type(1, 2, 2), + date_type(2, 1, 2), + date_type(2, 2, 2), + ] + expected = CFTimeIndex(expected_dates) + result = index + timedelta(days=2) + result = result - other + assert result.equals(expected) + assert isinstance(result, CFTimeIndex) + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_cftimeindex(calendar): @@ -784,6 +816,14 @@ def test_cftime_datetime_sub_cftimeindex(calendar): assert isinstance(result, pd.TimedeltaIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_distant_cftime_datetime_sub_cftimeindex(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(ValueError, match="difference exceeds"): + a.date_type(1, 1, 1) - a + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_timedeltaindex(calendar): @@ -795,6 +835,25 @@ def test_cftimeindex_sub_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_index_of_cftime_datetimes(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + b = pd.Index(a.values) + expected = a - a + result = a - b + assert result.equals(expected) + assert isinstance(result, pd.TimedeltaIndex) + + +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_not_implemented(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(TypeError, match="unsupported operand"): + a - 1 + + @requires_cftime def test_cftimeindex_rsub(index): with pytest.raises(TypeError): From 7f4f027e69b42ae1eb93fce2df708d65c70c0a10 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Mar 2020 13:25:12 +0000 Subject: [PATCH 24/24] Fix alignment with join="override" when some dims are unindexed (#3839) --- doc/whats-new.rst | 2 ++ xarray/core/alignment.py | 2 +- xarray/tests/test_concat.py | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 80309dc4673..34d4342b028 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). + By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing index with name reflecting the previous dimension name instead of the new one (:issue:`3748`, :pull:`3752`). By `Joseph K Aicher diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 908119f7995..a83b1b87aa4 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -50,7 +50,7 @@ def _override_indexes(objects, all_indexes, exclude): objects = list(objects) for idx, obj in enumerate(objects[1:]): new_indexes = {} - for dim in obj.dims: + for dim in obj.indexes: if dim not in exclude: new_indexes[dim] = all_indexes[dim][0] objects[idx + 1] = obj._overwrite_indexes(new_indexes) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index bd99181a947..77c030198ac 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -250,6 +250,13 @@ def test_concat_join_kwarg(self): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join]) + # regression test for #3681 + actual = concat([ds1.drop("x"), ds2.drop("x")], join="override", dim="y") + expected = Dataset( + {"a": (("x", "y"), np.array([0, 0], ndmin=2))}, coords={"y": [0, 0.0001]} + ) + assert_identical(actual, expected) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})]