diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index df5b2304bc3..83c3aea53a8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -25,8 +25,9 @@ assignees: '' -#### Output of ``xr.show_versions()`` -
+#### Versions + +
Output of `xr.show_versions()`
diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5789161c966..ce95fca1ba1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -38,7 +38,7 @@ jobs: py38: conda_env: py38 pool: - vmImage: 'macOS-10.13' + vmImage: 'macOS-10.15' steps: - template: ci/azure/unit-tests.yml diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 2c44e754cc4..2987303c92a 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -6,21 +6,22 @@ dependencies: - python=3.8 - bottleneck - cartopy - - cfgrib - - h5netcdf + - cfgrib>=0.9 + - dask>=2.10 + - h5netcdf>=0.7.4 - ipykernel - ipython - - iris + - iris>=2.3 - jupyter_client - nbsphinx - - netcdf4 + - netcdf4>=1.5 - numba - - numpy + - numpy>=1.17 - numpydoc - - pandas - - rasterio + - pandas>=1.0 + - rasterio>=1.1 - seaborn - setuptools - - sphinx - - sphinx_rtd_theme - - zarr + - sphinx>=2.3 + - sphinx_rtd_theme>=0.4 + - zarr>=2.4 \ No newline at end of file diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index c10fdf67dc4..a5eded49cd4 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -11,7 +11,7 @@ dependencies: - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - - pint=0.9 # Actually not enough as it doesn't implement __array_function__yet! + - pint=0.11 - pip - pytest - pytest-cov diff --git a/conftest.py b/conftest.py index 25dc284975e..712af1d3759 100644 --- a/conftest.py +++ b/conftest.py @@ -21,3 +21,14 @@ def pytest_runtest_setup(item): pytest.skip( "set --run-network-tests to run test requiring an " "internet connection" ) + + +@pytest.fixture(autouse=True) +def add_standard_imports(doctest_namespace): + import numpy as np + import pandas as pd + import xarray as xr + + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + doctest_namespace["xr"] = xr diff --git a/doc/contributing.rst b/doc/contributing.rst index eb31db24591..f581bcd9741 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -51,8 +51,8 @@ Bug reports must: `_:: ```python - >>> from xarray import Dataset - >>> df = Dataset(...) + >>> import xarray as xr + >>> df = xr.Dataset(...) ... ``` @@ -378,8 +378,8 @@ and then running:: pre-commit install -from the root of the xarray repository. You can skip the pre-commit checks with -``git commit --no-verify``. +from the root of the xarray repository. You can skip the pre-commit checks +with ``git commit --no-verify``. Backwards Compatibility diff --git a/doc/examples.rst b/doc/examples.rst index 3067ca824be..805395808e0 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -17,3 +17,12 @@ Using apply_ufunc :maxdepth: 2 examples/apply_ufunc_vectorize_1d + +External Examples +----------------- +.. toctree:: + :maxdepth: 2 + + Managing raster data with rioxarray + Xarray with dask + Xarray and dask on the cloud with Pangeo diff --git a/doc/installing.rst b/doc/installing.rst index dfc2841a956..a25bf65e342 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -11,6 +11,8 @@ Required dependencies - `numpy `__ (1.15 or later) - `pandas `__ (0.25 or later) +.. _optional-dependencies: + Optional dependencies --------------------- @@ -24,7 +26,7 @@ For netCDF and IO - `h5netcdf `__: an alternative library for reading and writing netCDF4 files that does not use the netCDF-C libraries - `pynio `__: for reading GRIB and other - geoscience specific file formats + geoscience specific file formats. Note that pynio is not available for Windows. - `zarr `__: for chunked, compressed, N-dimensional arrays. - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before @@ -121,16 +123,15 @@ xarray itself is a pure Python package, but its dependencies are not. The easiest way to get everything installed is to use conda_. To install xarray with its recommended dependencies using the conda command line tool:: - $ conda install xarray dask netCDF4 bottleneck + $ conda install -c conda-forge xarray dask netCDF4 bottleneck .. _conda: http://conda.io/ -We recommend using the community maintained `conda-forge `__ channel if you need difficult\-to\-build dependencies such as cartopy, pynio or PseudoNetCDF:: - - $ conda install -c conda-forge xarray cartopy pynio pseudonetcdf +If you require other :ref:`optional-dependencies` add them to the line above. -New releases may also appear in conda-forge before being updated in the default -channel. +We recommend using the community maintained `conda-forge `__ channel, +as some of the dependencies are difficult to build. New releases may also appear in conda-forge before +being updated in the default channel. If you don't use conda, be sure you have the required dependencies (numpy and pandas) installed first. Then, install xarray with pip:: diff --git a/doc/io.rst b/doc/io.rst index e910943236f..6064aa3568a 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -759,9 +759,53 @@ for an example of how to convert these to longitudes and latitudes. considered as being experimental. Please report any bug you may find on xarray's github repository. + +Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other +GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF. +`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping. + +.. ipython:: + :verbatim: + + In [1]: import rioxarray + + In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif') + + In [3]: rds + Out[3]: + + [1703814 values with dtype=uint8] + Coordinates: + * band (band) int64 1 2 3 + * y (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06 + * x (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05 + spatial_ref int64 0 + Attributes: + STATISTICS_MAXIMUM: 255 + STATISTICS_MEAN: 29.947726688477 + STATISTICS_MINIMUM: 0 + STATISTICS_STDDEV: 52.340921626611 + transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827... + _FillValue: 0.0 + scale_factor: 1.0 + add_offset: 0.0 + grid_mapping: spatial_ref + + In [4]: rds.rio.crs + Out[4]: CRS.from_epsg(32618) + + In [5]: rds4326 = rio.rio.reproject("epsg:4326") + + In [6]: rds4326.rio.crs + Out[6]: CRS.from_epsg(4326) + + In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif') + + .. _rasterio: https://rasterio.readthedocs.io/en/latest/ +.. _rioxarray: https://corteva.github.io/rioxarray/stable/ .. _test files: https://github.com/mapbox/rasterio/blob/master/tests/data/RGB.byte.tif -.. _pyproj: https://github.com/jswhit/pyproj +.. _pyproj: https://github.com/pyproj4/pyproj .. _io.zarr: diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 3188751366f..57b8da0c447 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -61,7 +61,9 @@ Extend xarray capabilities - `Collocate `_: Collocate xarray trajectories in arbitrary physical dimensions - `eofs `_: EOF analysis in Python. - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. +- `nxarray `_: NeXus input/output capability for xarray. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). +- `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API. - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. - `X-regression `_: Multiple linear regression from Statsmodels library coupled with Xarray library. diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 96641c2b97e..9e7c0f1d51d 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -105,6 +105,14 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.time.dt.dayofyear da.time.dt.dayofweek +- Rounding of datetimes to fixed frequencies via the ``dt`` accessor: + +.. ipython:: python + + da.time.dt.ceil('3D') + da.time.dt.floor('5D') + da.time.dt.round('2D') + - Group-by operations based on datetime accessor attributes (e.g. by month of the year): diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8768139cf9f..b90a4a466ee 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,11 +25,31 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added support for :py:class:`pandas.DatetimeIndex`-style rounding of + ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the + :py:class:`~core.accessor_dt.DatetimeAccessor`. + By `Spencer Clark `_ - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. -- implement pint support. (:issue:`3594`, :pull:`3706`) +- Support unit aware arrays with pint. (:issue:`3594`, :pull:`3706`, :pull:`3611`) By `Justus Magin `_. +- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a + `TypeError` on multiple string arguments. Receiving multiple string arguments + often means a user is attempting to pass multiple dimensions to group over + and should instead pass a list. + By `Maximilian Roos `_ +- The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced + in 0.14.1) is now on by default. To disable, use + ``xarray.set_options(display_style="text")``. + By `Julia Signell `_. +- :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a + first argument, which is then called on the input; replicating pandas' behavior. + By `Maximilian Roos `_. +- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, + :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` + (:issue:`3843`, :pull:`3844`) + By `Aaron Spring `_. Bug fixes @@ -39,6 +59,8 @@ Bug fixes :py:meth:`DataArray.groupby` when performing an operation that changes the size of the groups along the grouped dimension. By `Eric Jansen `_. +- Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). + By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing index with name reflecting the previous dimension name instead of the new one (:issue:`3748`, :pull:`3752`). By `Joseph K Aicher @@ -52,9 +74,22 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. +- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)`` + to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword + argument ``keep_attrs`` to change this setting. (:issue:`3376`, + :pull:`3801`) By `Andrew Thomas `_. + +- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` + simultaneously. (:issue:`3170`). By `Matthias Meyer `_. Documentation ~~~~~~~~~~~~~ +- Fix documentation of :py:class:`DataArray` removing the deprecated mention + that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) + By `Sander van Rijn `_. +- Update the installation instructions: only explicitly list recommended dependencies + (:issue:`3756`). + By `Mathias Hauser `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -67,6 +102,19 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Preserved the ability to index with ``method="nearest"`` with a + :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 + (:issue:`3751`). By `Spencer Clark `_. +- Greater flexibility and improved test coverage of subtracting various types + of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark + `_. +- Updated Azure CI MacOS image, given pending removal. + By `Maximilian Roos `_ +- Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). + By `Mathias Hauser `_. +- Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas + in favor of xarray's objects. + By `Maximilian Roos `_ .. _whats-new.0.15.0: diff --git a/readthedocs.yml b/readthedocs.yml index ad249bf8c09..173d61ec6f3 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,13 +1,9 @@ version: 2 build: - image: latest + image: stable conda: environment: ci/requirements/doc.yml -python: - version: 3.8 - install: [] - formats: [] diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 763769dac74..2469a31a3d9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -373,7 +373,7 @@ def store( if len(existing_variables) > 0: # there are variables to append # their encoding must be the same as in the store - ds = open_zarr(self.ds.store, chunks=None) + ds = open_zarr(self.ds.store, group=self.ds.path, chunks=None) variables_with_encoding = {} for vn in existing_variables: variables_with_encoding[vn] = variables[vn].copy(deep=False) @@ -487,7 +487,7 @@ def open_zarr( directory in file system where a Zarr DirectoryStore has been stored. synchronizer : object, optional Array synchronizer provided to zarr - group : str, obtional + group : str, optional Group path. (a.k.a. `path` in zarr terminology.) chunks : int or dict or tuple or {None, 'auto'}, optional Chunk sizes along each dimension, e.g., ``5`` or diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 8b440812ca9..1ea5d3a7d11 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -49,6 +49,7 @@ from xarray.core.utils import is_scalar +from ..core.common import _contains_cftime_datetimes from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name @@ -326,6 +327,32 @@ def _get_string_slice(self, key): raise KeyError(key) return loc + def _get_nearest_indexer(self, target, limit, tolerance): + """Adapted from pandas.Index._get_nearest_indexer""" + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + left_distances = abs(self.values[left_indexer] - target.values) + right_distances = abs(self.values[right_indexer] - target.values) + + if self.is_monotonic_increasing: + condition = (left_distances < right_distances) | (right_indexer == -1) + else: + condition = (left_distances <= right_distances) | (right_indexer == -1) + indexer = np.where(condition, left_indexer, right_indexer) + + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + def _filter_indexer_tolerance(self, target, indexer, tolerance): + """Adapted from pandas.Index._filter_indexer_tolerance""" + if isinstance(target, pd.Index): + distance = abs(self.values[indexer] - target.values) + else: + distance = abs(self.values[indexer] - target) + indexer = np.where(distance <= tolerance, indexer, -1) + return indexer + def get_loc(self, key, method=None, tolerance=None): """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" if isinstance(key, str): @@ -427,9 +454,11 @@ def __radd__(self, other): return CFTimeIndex(other + np.array(self)) def __sub__(self, other): - import cftime - - if isinstance(other, (CFTimeIndex, cftime.datetime)): + if _contains_datetime_timedeltas(other): + return CFTimeIndex(np.array(self) - other) + elif isinstance(other, pd.TimedeltaIndex): + return CFTimeIndex(np.array(self) - other.to_pytimedelta()) + elif _contains_cftime_datetimes(np.array(other)): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) except OverflowError: @@ -437,14 +466,17 @@ def __sub__(self, other): "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." ) - - elif isinstance(other, pd.TimedeltaIndex): - return CFTimeIndex(np.array(self) - other.to_pytimedelta()) else: - return CFTimeIndex(np.array(self) - other) + return NotImplemented def __rsub__(self, other): - return pd.TimedeltaIndex(other - np.array(self)) + try: + return pd.TimedeltaIndex(other - np.array(self)) + except OverflowError: + raise ValueError( + "The time difference exceeds the range of values " + "that can be expressed at the nanosecond resolution." + ) def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. @@ -528,6 +560,83 @@ def strftime(self, date_format): """ return pd.Index([date.strftime(date_format) for date in self._data]) + @property + def asi8(self): + """Convert to integers with units of microseconds since 1970-01-01.""" + from ..core.resample_cftime import exact_cftime_datetime_difference + + epoch = self.date_type(1970, 1, 1) + return np.array( + [ + _total_microseconds(exact_cftime_datetime_difference(epoch, date)) + for date in self.values + ] + ) + + def _round_via_method(self, freq, method): + """Round dates using a specified method.""" + from .cftime_offsets import CFTIME_TICKS, to_offset + + offset = to_offset(freq) + if not isinstance(offset, CFTIME_TICKS): + raise ValueError(f"{offset} is a non-fixed frequency") + + unit = _total_microseconds(offset.as_timedelta()) + values = self.asi8 + rounded = method(values, unit) + return _cftimeindex_from_i8(rounded, self.date_type, self.name) + + def floor(self, freq): + """Round dates down to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _floor_int) + + def ceil(self, freq): + """Round dates up to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _ceil_int) + + def round(self, freq): + """Round dates to a fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _round_to_nearest_half_even) + def _parse_iso8601_without_reso(date_type, datetime_str): date, _ = _parse_iso8601_with_reso(date_type, datetime_str) @@ -554,3 +663,67 @@ def _parse_array_of_cftime_strings(strings, date_type): return np.array( [_parse_iso8601_without_reso(date_type, s) for s in strings.ravel()] ).reshape(strings.shape) + + +def _contains_datetime_timedeltas(array): + """Check if an input array contains datetime.timedelta objects.""" + array = np.atleast_1d(array) + return isinstance(array[0], timedelta) + + +def _cftimeindex_from_i8(values, date_type, name): + """Construct a CFTimeIndex from an array of integers. + + Parameters + ---------- + values : np.array + Integers representing microseconds since 1970-01-01. + date_type : cftime.datetime + Type of date for the index. + name : str + Name of the index. + + Returns + ------- + CFTimeIndex + """ + epoch = date_type(1970, 1, 1) + dates = np.array([epoch + timedelta(microseconds=int(value)) for value in values]) + return CFTimeIndex(dates, name=name) + + +def _total_microseconds(delta): + """Compute the total number of microseconds of a datetime.timedelta. + + Parameters + ---------- + delta : datetime.timedelta + Input timedelta. + + Returns + ------- + int + """ + return delta / timedelta(microseconds=1) + + +def _floor_int(values, unit): + """Copied from pandas.""" + return values - np.remainder(values, unit) + + +def _ceil_int(values, unit): + """Copied from pandas.""" + return values + np.remainder(-values, unit) + + +def _round_to_nearest_half_even(values, unit): + """Copied from pandas.""" + if unit % 2: + return _ceil_int(values - unit // 2, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index c407371f9f0..de0e332b26c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -78,20 +78,27 @@ def _get_date_field(values, name, dtype): return access_method(values, name) -def _round_series(values, name, freq): - """Coerce an array of datetime-like values to a pandas Series and - apply requested rounding +def _round_through_series_or_index(values, name, freq): + """Coerce an array of datetime-like values to a pandas Series or xarray + CFTimeIndex and apply requested rounding """ - values_as_series = pd.Series(values.ravel()) - method = getattr(values_as_series.dt, name) + from ..coding.cftimeindex import CFTimeIndex + + if is_np_datetime_like(values.dtype): + values_as_series = pd.Series(values.ravel()) + method = getattr(values_as_series.dt, name) + else: + values_as_cftimeindex = CFTimeIndex(values.ravel()) + method = getattr(values_as_cftimeindex, name) + field_values = method(freq=freq).values return field_values.reshape(values.shape) def _round_field(values, name, freq): - """Indirectly access pandas rounding functions by wrapping data - as a Series and calling through `.dt` attribute. + """Indirectly access rounding functions by wrapping data + as a Series or CFTimeIndex Parameters ---------- @@ -110,9 +117,12 @@ def _round_field(values, name, freq): if isinstance(values, dask_array_type): from dask.array import map_blocks - return map_blocks(_round_series, values, name, freq=freq, dtype=np.datetime64) + dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") + return map_blocks( + _round_through_series_or_index, values, name, freq=freq, dtype=dtype + ) else: - return _round_series(values, name, freq) + return _round_through_series_or_index(values, name, freq) def _strftime_through_cftimeindex(values, date_format): diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 908119f7995..a83b1b87aa4 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -50,7 +50,7 @@ def _override_indexes(objects, all_indexes, exclude): objects = list(objects) for idx, obj in enumerate(objects[1:]): new_indexes = {} - for dim in obj.dims: + for dim in obj.indexes: if dim not in exclude: new_indexes[dim] = all_indexes[dim][0] objects[idx + 1] = obj._overwrite_indexes(new_indexes) diff --git a/xarray/core/common.py b/xarray/core/common.py index e908c69dd14..c80cb24c5b5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -660,6 +660,17 @@ def groupby(self, group, squeeze: bool = True, restore_coord_dims: bool = None): core.groupby.DataArrayGroupBy core.groupby.DatasetGroupBy """ + # While we don't generally check the type of every arg, passing + # multiple dimensions as multiple arguments is common enough, and the + # consequences hidden enough (strings evaluate as true) to warrant + # checking here. + # A future version could make squeeze kwarg only, but would face + # backward-compat issues. + if not isinstance(squeeze, bool): + raise TypeError( + f"`squeeze` must be True or False, but {squeeze} was supplied" + ) + return self._groupby_cls( self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims ) @@ -742,6 +753,7 @@ def rolling( dim: Mapping[Hashable, int] = None, min_periods: int = None, center: bool = False, + keep_attrs: bool = None, **window_kwargs: int, ): """ @@ -758,6 +770,10 @@ def rolling( setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **window_kwargs : optional The keyword arguments form of ``dim``. One of dim or window_kwargs must be provided. @@ -799,8 +815,13 @@ def rolling( core.rolling.DataArrayRolling core.rolling.DatasetRolling """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + dim = either_dict_or_kwargs(dim, window_kwargs, "rolling") - return self._rolling_cls(self, dim, min_periods=min_periods, center=center) + return self._rolling_cls( + self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs + ) def rolling_exp( self, @@ -848,6 +869,7 @@ def coarsen( boundary: str = "exact", side: Union[str, Mapping[Hashable, str]] = "left", coord_func: str = "mean", + keep_attrs: bool = None, **window_kwargs: int, ): """ @@ -868,8 +890,12 @@ def coarsen( multiple of the window size. If 'trim', the excess entries are dropped. If 'pad', NA will be padded. side : 'left' or 'right' or mapping from dimension to 'left' or 'right' - coord_func : function (name) that is applied to the coordintes, + coord_func : function (name) that is applied to the coordinates, or a mapping from coordinate name to function (name). + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -904,9 +930,17 @@ def coarsen( core.rolling.DataArrayCoarsen core.rolling.DatasetCoarsen """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen") return self._coarsen_cls( - self, dim, boundary=boundary, side=side, coord_func=coord_func + self, + dim, + boundary=boundary, + side=side, + coord_func=coord_func, + keep_attrs=keep_attrs, ) def resample( @@ -1085,6 +1119,15 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): >>> import numpy as np >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=('x', 'y')) + >>> a + + array([[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24]]) + Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4) array([[ 0., 1., 2., 3., nan], @@ -1093,6 +1136,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan, nan], [ nan, nan, nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 5, -1) array([[ 0, 1, 2, 3, 4], @@ -1101,6 +1145,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [15, 16, -1, -1, -1], [20, -1, -1, -1, -1]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4, drop=True) array([[ 0., 1., 2., 3.], @@ -1109,6 +1154,14 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(lambda x: x.x + x.y < 4, drop=True) + + array([[ 0., 1., 2., 3.], + [ 5., 6., 7., nan], + [ 10., 11., nan, nan], + [ 15., nan, nan, nan]]) + Dimensions without coordinates: x, y + See also -------- numpy.where : corresponding numpy function @@ -1118,6 +1171,9 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): from .dataarray import DataArray from .dataset import Dataset + if callable(cond): + cond = cond(self) + if drop: if other is not dtypes.NA: raise ValueError("cannot set `other` if drop=True") diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 062cc6342df..6782070da0b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -304,8 +304,7 @@ def __init__( Name(s) of the data dimension(s). Must be either a hashable (only for 1D data) or a sequence of hashables with length equal to the number of dimensions. If this argument is omitted, dimension names - are taken from ``coords`` (if possible) and otherwise default to - ``['dim_0', ... 'dim_n']``. + default to ``['dim_0', ... 'dim_n']``. name : str or None, optional Name of this array. attrs : dict_like or None, optional @@ -1476,20 +1475,23 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Examples -------- + >>> arr = xr.DataArray(data=[0, 1], dims="x", - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> arr array([0, 1]) Coordinates: * x (x) >> arr.swap_dims({"x": "y"}) array([0, 1]) Coordinates: x (y) >> arr.swap_dims({"x": "z"}) array([0, 1]) @@ -1719,7 +1721,7 @@ def stack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1769,7 +1771,7 @@ def unstack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1818,7 +1820,7 @@ def to_unstacked_dataset(self, dim, level=0): Examples -------- >>> import xarray as xr - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> data = xr.Dataset({'a': arr, 'b': arr.isel(y=0)}) >>> data @@ -2244,20 +2246,14 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: * 0D -> `xarray.DataArray` * 1D -> `pandas.Series` * 2D -> `pandas.DataFrame` - * 3D -> `pandas.Panel` *(deprecated)* - Only works for arrays with 3 or fewer dimensions. + Only works for arrays with 2 or fewer dimensions. The DataArray constructor performs the inverse transformation. """ # TODO: consolidate the info about pandas constructors and the # attributes that correspond to their indexes into a separate module? - constructors = { - 0: lambda x: x, - 1: pd.Series, - 2: pd.DataFrame, - 3: pdcompat.Panel, - } + constructors = {0: lambda x: x, 1: pd.Series, 2: pd.DataFrame} try: constructor = constructors[self.ndim] except KeyError: @@ -2630,7 +2626,7 @@ def plot(self) -> _PlotMethods: """ Access plotting functions for DataArray's - >>> d = DataArray([[1, 2], [3, 4]]) + >>> d = xr.DataArray([[1, 2], [3, 4]]) For convenience just call this directly @@ -2693,6 +2689,12 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr difference : same type as caller The n-th order finite difference of this object. + .. note:: + + `n` matches numpy's behavior and is different from pandas' first + argument named `periods`. + + Examples -------- >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ['x']) @@ -2850,18 +2852,20 @@ def dot( -------- >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) - >>> da = DataArray(da_vals, dims=['x', 'y', 'z']) + >>> da = xr.DataArray(da_vals, dims=['x', 'y', 'z']) >>> dm_vals = np.arange(4) - >>> dm = DataArray(dm_vals, dims=['z']) + >>> dm = xr.DataArray(dm_vals, dims=['z']) >>> dm.dims ('z') + >>> da.dims ('x', 'y', 'z') >>> dot_result = da.dot(dm) >>> dot_result.dims ('x', 'y') + """ if isinstance(other, Dataset): raise NotImplementedError( @@ -2934,6 +2938,7 @@ def quantile( dim: Union[Hashable, Sequence[Hashable], None] = None, interpolation: str = "linear", keep_attrs: bool = None, + skipna: bool = True, ) -> "DataArray": """Compute the qth quantile of the data along the specified dimension. @@ -2961,6 +2966,8 @@ def quantile( If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -2973,7 +2980,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile Examples -------- @@ -3010,7 +3017,11 @@ def quantile( """ ds = self._to_temp_dataset().quantile( - q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation + q, + dim=dim, + keep_attrs=keep_attrs, + interpolation=interpolation, + skipna=skipna, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7252dd2f3df..a4d20a79b7c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1011,7 +1011,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": >>> da = xr.DataArray(np.random.randn(2, 3)) >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, - coords={'x': ['one', 'two']}) + ... coords={'x': ['one', 'two']}) >>> ds.copy() Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1021,6 +1021,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 -0.8079 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds_0 = ds.copy(deep=False) >>> ds_0['foo'][0, 0] = 7 >>> ds_0 @@ -1032,6 +1033,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 7.0 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1055,6 +1057,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) int64 0 1 2 3 4 5 bar (x) >> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -2883,7 +2886,7 @@ def swap_dims( Examples -------- >>> ds = xr.Dataset(data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> ds Dimensions: (x: 2) @@ -2893,6 +2896,7 @@ def swap_dims( Data variables: a (x) int64 5 7 b (x) float64 0.1 2.4 + >>> ds.swap_dims({"x": "y"}) Dimensions: (y: 2) @@ -2902,6 +2906,7 @@ def swap_dims( Data variables: a (y) int64 5 7 b (y) float64 0.1 2.4 + >>> ds.swap_dims({"x": "z"}) Dimensions: (z: 2) @@ -3341,7 +3346,7 @@ def to_stacked_array( Examples -------- - >>> data = Dataset( + >>> data = xr.Dataset( ... data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), ... 'b': ('x', [6, 7])}, ... coords={'y': ['u', 'v', 'w']} @@ -4879,6 +4884,11 @@ def diff(self, dim, n=1, label="upper"): difference : same type as caller The n-th order finite difference of this object. + .. note:: + + `n` matches numpy's behavior and is different from pandas' first + argument named `periods`. + Examples -------- >>> ds = xr.Dataset({'foo': ('x', [5, 5, 6, 6])}) @@ -5135,7 +5145,13 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile( - self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None + self, + q, + dim=None, + interpolation="linear", + numeric_only=False, + keep_attrs=None, + skipna=True, ): """Compute the qth quantile of the data along the specified dimension. @@ -5166,6 +5182,8 @@ def quantile( object will be returned without attributes. numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -5178,7 +5196,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, DataArray.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, DataArray.quantile Examples -------- @@ -5253,6 +5271,7 @@ def quantile( dim=reduce_dims, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) else: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 297ef8fbeec..ab51eaeebdd 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -558,7 +558,9 @@ def fillna(self, value): out = ops.fillna(self, value) return out - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -582,6 +584,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -595,7 +599,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile Examples @@ -656,6 +660,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): dim=dim, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) return out diff --git a/xarray/core/options.py b/xarray/core/options.py index 72f9ad8e1fa..15d05159d6d 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -20,7 +20,7 @@ CMAP_SEQUENTIAL: "viridis", CMAP_DIVERGENT: "RdBu_r", KEEP_ATTRS: "default", - DISPLAY_STYLE: "text", + DISPLAY_STYLE: "html", } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index ea6d72b2e03..5f633abbde6 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -7,6 +7,7 @@ from . import dtypes, duck_array_ops, utils from .dask_array_ops import dask_rolling_wrapper from .ops import inject_reduce_methods +from .options import _get_keep_attrs from .pycompat import dask_array_type try: @@ -42,10 +43,10 @@ class Rolling: DataArray.rolling """ - __slots__ = ("obj", "window", "min_periods", "center", "dim") - _attributes = ("window", "min_periods", "center", "dim") + __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs") + _attributes = ("window", "min_periods", "center", "dim", "keep_attrs") - def __init__(self, obj, windows, min_periods=None, center=False): + def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ Moving window object. @@ -65,6 +66,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -89,6 +94,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): self.center = center self.dim = dim + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + self.keep_attrs = keep_attrs + @property def _min_periods(self): return self.min_periods if self.min_periods is not None else self.window @@ -143,7 +152,7 @@ def count(self): class DataArrayRolling(Rolling): __slots__ = ("window_labels",) - def __init__(self, obj, windows, min_periods=None, center=False): + def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ Moving window object for DataArray. You should use DataArray.rolling() method to construct this object @@ -165,6 +174,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -177,7 +190,11 @@ def __init__(self, obj, windows, min_periods=None, center=False): Dataset.rolling Dataset.groupby """ - super().__init__(obj, windows, min_periods=min_periods, center=center) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + super().__init__( + obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs + ) self.window_labels = self.obj[self.dim] @@ -214,21 +231,22 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling = da.rolling(b=3, center=True) >>> rolling.construct('window_dim') array([[[np.nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, np.nan]], [[np.nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, np.nan]]]) Dimensions without coordinates: a, b, window_dim + """ from .dataarray import DataArray @@ -261,26 +279,26 @@ def reduce(self, func, **kwargs): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling.reduce(np.sum) array([[nan, nan, 3., 6.], [nan, nan, 15., 18.]]) Dimensions without coordinates: a, b - >>> + >>> rolling = da.rolling(b=3, min_periods=1) >>> rolling.reduce(np.nansum) array([[ 0., 1., 3., 6.], [ 4., 9., 15., 18.]]) + """ rolling_dim = utils.get_temp_dimname(self.obj.dims, "_rolling_dim") windows = self.construct(rolling_dim) @@ -374,7 +392,7 @@ def _numpy_or_bottleneck_reduce( class DatasetRolling(Rolling): __slots__ = ("rollings",) - def __init__(self, obj, windows, min_periods=None, center=False): + def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None): """ Moving window object for Dataset. You should use Dataset.rolling() method to construct this object @@ -396,6 +414,10 @@ def __init__(self, obj, windows, min_periods=None, center=False): setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. + keep_attrs : bool, optional + If True, the object's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -408,7 +430,7 @@ def __init__(self, obj, windows, min_periods=None, center=False): Dataset.groupby DataArray.groupby """ - super().__init__(obj, windows, min_periods, center) + super().__init__(obj, windows, min_periods, center, keep_attrs) if self.dim not in self.obj.dims: raise KeyError(self.dim) # Keep each Rolling object as a dictionary @@ -416,7 +438,9 @@ def __init__(self, obj, windows, min_periods=None, center=False): for key, da in self.obj.data_vars.items(): # keeps rollings only for the dataset depending on slf.dim if self.dim in da.dims: - self.rollings[key] = DataArrayRolling(da, windows, min_periods, center) + self.rollings[key] = DataArrayRolling( + da, windows, min_periods, center, keep_attrs + ) def _dataset_implementation(self, func, **kwargs): from .dataset import Dataset @@ -427,7 +451,8 @@ def _dataset_implementation(self, func, **kwargs): reduced[key] = func(self.rollings[key], **kwargs) else: reduced[key] = self.obj[key] - return Dataset(reduced, coords=self.obj.coords) + attrs = self.obj.attrs if self.keep_attrs else {} + return Dataset(reduced, coords=self.obj.coords, attrs=attrs) def reduce(self, func, **kwargs): """Reduce the items in this group by applying `func` along some @@ -466,7 +491,7 @@ def _numpy_or_bottleneck_reduce( **kwargs, ) - def construct(self, window_dim, stride=1, fill_value=dtypes.NA): + def construct(self, window_dim, stride=1, fill_value=dtypes.NA, keep_attrs=None): """ Convert this rolling object to xr.Dataset, where the window dimension is stacked as a new dimension @@ -487,6 +512,9 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): from .dataset import Dataset + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + dataset = {} for key, da in self.obj.data_vars.items(): if self.dim in da.dims: @@ -509,10 +537,18 @@ class Coarsen: DataArray.coarsen """ - __slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess") + __slots__ = ( + "obj", + "boundary", + "coord_func", + "windows", + "side", + "trim_excess", + "keep_attrs", + ) _attributes = ("windows", "side", "trim_excess") - def __init__(self, obj, windows, boundary, side, coord_func): + def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): """ Moving window object. @@ -541,6 +577,7 @@ def __init__(self, obj, windows, boundary, side, coord_func): self.windows = windows self.side = side self.boundary = boundary + self.keep_attrs = keep_attrs absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] if absent_dims: @@ -626,6 +663,11 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool def wrapped_func(self, **kwargs): from .dataset import Dataset + if self.keep_attrs: + attrs = self.obj.attrs + else: + attrs = {} + reduced = {} for key, da in self.obj.data_vars.items(): reduced[key] = da.variable.coarsen( @@ -644,7 +686,7 @@ def wrapped_func(self, **kwargs): ) else: coords[c] = v.variable - return Dataset(reduced, coords=coords) + return Dataset(reduced, coords=coords, attrs=attrs) return wrapped_func diff --git a/xarray/core/variable.py b/xarray/core/variable.py index daa8678157b..435edb6f014 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1678,7 +1678,9 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """ return self.broadcast_equals(other, equiv=equiv) - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1725,6 +1727,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): from .computation import apply_ufunc + _quantile_func = np.nanquantile if skipna else np.quantile + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -1739,7 +1743,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc - return np.moveaxis(np.nanquantile(npa, **kwargs), 0, -1) + return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) result = apply_ufunc( @@ -1949,6 +1953,9 @@ def _coarsen_reshape(self, windows, boundary, side): else: shape.append(variable.shape[i]) + keep_attrs = _get_keep_attrs(default=False) + variable.attrs = variable._attrs if keep_attrs else {} + return variable.data.reshape(shape), tuple(axes) @property diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index f178720a6e1..1a8a2732eeb 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -7,6 +7,7 @@ from . import ( assert_array_equal, assert_equal, + assert_identical, raises_regex, requires_cftime, requires_dask, @@ -435,3 +436,106 @@ def test_seasons(cftime_date_type): seasons = xr.DataArray(seasons) assert_array_equal(seasons.values, dates.dt.season.values) + + +@pytest.fixture +def cftime_rounding_dataarray(cftime_date_type): + return xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 1), cftime_date_type(1, 1, 1, 15)], + [cftime_date_type(1, 1, 1, 23), cftime_date_type(1, 1, 2, 1)], + ] + ) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_floor_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 1, 0)], + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="floor", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.floor(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.floor(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_ceil_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 3, 0)], + ], + name="ceil", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.ceil(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.ceil(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_round_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="round", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.round(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.round(freq) + + assert_identical(result, expected) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b7ba70ef6c4..59ed8e690cc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1729,39 +1729,52 @@ def test_hidden_zarr_keys(self): pass @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") - def test_write_persistence_modes(self): + @pytest.mark.parametrize("group", [None, "group1"]) + def test_write_persistence_modes(self, group): original = create_test_data() # overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # don't overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w-"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w-", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # make sure overwriting works as expected with self.create_zarr_target() as store: self.save(original, store) # should overwrite with no error - self.save(original, store, mode="w") - with self.open(store) as actual: + self.save(original, store, mode="w", group=group) + with self.open(store, group=group) as actual: assert_identical(original, actual) with pytest.raises(ValueError): self.save(original, store, mode="w-") # check append mode for normal write - with self.roundtrip(original, save_kwargs={"mode": "a"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "a", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) - ds, ds_to_append, _ = create_append_test_data() - # check append mode for append write + ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") - ds_to_append.to_zarr(store_target, append_dim="time") + ds.to_zarr(store_target, mode="w", group=group) + ds_to_append.to_zarr(store_target, append_dim="time", group=group) original = xr.concat([ds, ds_to_append], dim="time") - assert_identical(original, xr.open_zarr(store_target)) + actual = xr.open_zarr(store_target, group=group) + assert_identical(original, actual) def test_compressor_encoding(self): original = create_test_data() @@ -1966,24 +1979,8 @@ def create_zarr_target(self): yield tmp -class ScipyWriteBase(CFEncodedBase, NetCDF3Only): - def test_append_write(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_write() - - def test_append_overwrite_values(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_overwrite_values() - - @requires_scipy -class TestScipyInMemoryData(ScipyWriteBase): +class TestScipyInMemoryData(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2004,7 +2001,7 @@ def test_bytes_pickle(self): @requires_scipy -class TestScipyFileObject(ScipyWriteBase): +class TestScipyFileObject(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2037,7 +2034,7 @@ def test_pickle_dataarray(self): @requires_scipy -class TestScipyFilePath(ScipyWriteBase): +class TestScipyFilePath(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -3304,7 +3301,7 @@ def test_session(self): @requires_scipy @requires_pynio -class TestPyNio(ScipyWriteBase): +class TestPyNio(CFEncodedBase, NetCDF3Only): def test_write_store(self): # pynio is read-only for now pass diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8025766529e..43d6d7b068e 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -451,10 +451,21 @@ def test_sel_date_scalar(da, date_type, index): @pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") +@requires_cftime +def test_sel_date_distant_date(da, date_type, index): + expected = xr.DataArray(4).assign_coords(time=index[3]) + result = da.sel(time=date_type(2000, 1, 1), method="nearest") + assert_identical(result, expected) + + @requires_cftime @pytest.mark.parametrize( "sel_kwargs", - [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}], + [ + {"method": "nearest"}, + {"method": "nearest", "tolerance": timedelta(days=70)}, + {"method": "nearest", "tolerance": timedelta(days=1800000)}, + ], ) def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): expected = xr.DataArray(2).assign_coords(time=index[1]) @@ -738,7 +749,7 @@ def test_timedeltaindex_add_cftimeindex(calendar): @requires_cftime -def test_cftimeindex_sub(index): +def test_cftimeindex_sub_timedelta(index): date_type = index.date_type expected_dates = [ date_type(1, 1, 2), @@ -753,6 +764,27 @@ def test_cftimeindex_sub(index): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize( + "other", + [np.array(4 * [timedelta(days=1)]), np.array(timedelta(days=1))], + ids=["1d-array", "scalar-array"], +) +def test_cftimeindex_sub_timedelta_array(index, other): + date_type = index.date_type + expected_dates = [ + date_type(1, 1, 2), + date_type(1, 2, 2), + date_type(2, 1, 2), + date_type(2, 2, 2), + ] + expected = CFTimeIndex(expected_dates) + result = index + timedelta(days=2) + result = result - other + assert result.equals(expected) + assert isinstance(result, CFTimeIndex) + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_cftimeindex(calendar): @@ -784,6 +816,14 @@ def test_cftime_datetime_sub_cftimeindex(calendar): assert isinstance(result, pd.TimedeltaIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_distant_cftime_datetime_sub_cftimeindex(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(ValueError, match="difference exceeds"): + a.date_type(1, 1, 1) - a + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_timedeltaindex(calendar): @@ -795,6 +835,25 @@ def test_cftimeindex_sub_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_index_of_cftime_datetimes(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + b = pd.Index(a.values) + expected = a - a + result = a - b + assert result.equals(expected) + assert isinstance(result, pd.TimedeltaIndex) + + +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_not_implemented(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(TypeError, match="unsupported operand"): + a - 1 + + @requires_cftime def test_cftimeindex_rsub(index): with pytest.raises(TypeError): @@ -904,3 +963,92 @@ def test_multiindex(): index = xr.cftime_range("2001-01-01", periods=100, calendar="360_day") mindex = pd.MultiIndex.from_arrays([index]) assert mindex.get_loc("2001-01") == slice(0, 30) + + +@requires_cftime +@pytest.mark.parametrize("freq", ["3663S", "33T", "2H"]) +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_against_datetimeindex(freq, method): + expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777S") + expected = getattr(expected, method)(freq) + result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + result = getattr(result, method)(freq).to_datetimeindex() + assert result.equals(expected) + + +@requires_cftime +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_invalid_freq(method): + index = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + with pytest.raises(ValueError, match="fixed"): + getattr(index, method)("MS") + + +@pytest.fixture +def rounding_index(date_type): + return xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 999512), + date_type(1, 1, 1, 3, 0, 1, 500001), + date_type(1, 1, 1, 7, 0, 6, 499999), + ] + ) + + +@requires_cftime +def test_ceil(rounding_index, date_type): + result = rounding_index.ceil("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 7, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_floor(rounding_index, date_type): + result = rounding_index.floor("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 0), + date_type(1, 1, 1, 3, 0, 1, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_round(rounding_index, date_type): + result = rounding_index.round("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_asi8(date_type): + index = xr.CFTimeIndex([date_type(1970, 1, 1), date_type(1970, 1, 2)]) + result = index.asi8 + expected = 1000000 * 86400 * np.array([0, 1]) + np.testing.assert_array_equal(result, expected) + + +@requires_cftime +def test_asi8_distant_date(): + """Test that asi8 conversion is truly exact.""" + import cftime + + date_type = cftime.DatetimeProlepticGregorian + index = xr.CFTimeIndex([date_type(10731, 4, 22, 3, 25, 45, 123456)]) + result = index.asi8 + expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456]) + np.testing.assert_array_equal(result, expected) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index bd99181a947..77c030198ac 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -250,6 +250,13 @@ def test_concat_join_kwarg(self): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join]) + # regression test for #3681 + actual = concat([ds1.drop("x"), ds2.drop("x")], join="override", dim="y") + expected = Dataset( + {"a": (("x", "y"), np.array([0, 0], ndmin=2))}, coords={"y": [0, 0.0001]} + ) + assert_identical(actual, expected) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 0a622d279ba..dfaf8fd4e28 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2215,6 +2215,12 @@ def test_where(self): actual = arr.where(arr.x < 2, drop=True) assert_identical(actual, expected) + def test_where_lambda(self): + arr = DataArray(np.arange(4), dims="y") + expected = arr.sel(y=slice(2)) + actual = arr.where(lambda x: x.y < 2, drop=True) + assert_identical(actual, expected) + def test_where_string(self): array = DataArray(["a", "b"]) expected = DataArray(np.array(["a", np.nan], dtype=object)) @@ -2362,13 +2368,15 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + def test_quantile(self, q, axis, dim, skipna): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) if is_scalar(q): assert "quantile" not in actual.dims @@ -3403,14 +3411,10 @@ def test_to_pandas(self): assert_array_equal(actual.columns, [0, 1]) # roundtrips - for shape in [(3,), (3, 4), (3, 4, 5)]: - if len(shape) > 2 and LooseVersion(pd.__version__) >= "0.25.0": - continue + for shape in [(3,), (3, 4)]: dims = list("abc")[: len(shape)] da = DataArray(np.random.randn(*shape), dims=dims) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - roundtripped = DataArray(da.to_pandas()).drop_vars(dims) + roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) with raises_regex(ValueError, "cannot convert"): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5e254c37e44..d2e8c6b7609 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4349,6 +4349,12 @@ def test_where(self): assert actual.a.name == "a" assert actual.a.attrs == ds.a.attrs + # lambda + ds = Dataset({"a": ("x", range(5))}) + expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) + actual = ds.where(lambda x: x > 1) + assert_identical(expected, actual) + def test_where_other(self): ds = Dataset({"a": ("x", range(5))}, {"x": range(5)}) expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)}) @@ -4356,6 +4362,9 @@ def test_where_other(self): assert_equal(expected, actual) assert actual.a.dtype == int + actual = ds.where(lambda x: x > 1, -1) + assert_equal(expected, actual) + with raises_regex(ValueError, "cannot set"): ds.where(ds > 1, other=0, drop=True) @@ -4688,12 +4697,13 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q): + def test_quantile(self, q, skipna): ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) if is_scalar(q): assert "quantile" not in ds_quantile.dims else: @@ -4701,12 +4711,27 @@ def test_quantile(self, q): for var, dar in ds.data_vars.items(): assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + assert_identical( + ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna) + ) dim = ["dim1", "dim2"] - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) + @pytest.mark.parametrize("skipna", [True, False]) + def test_quantile_skipna(self, skipna): + q = 0.1 + dim = "time" + ds = Dataset({"a": ([dim], np.arange(0, 11))}) + ds = ds.where(ds >= 1) + + result = ds.quantile(q=q, dim=dim, skipna=skipna) + + value = 1.9 if skipna else np.nan + expected = Dataset({"a": value}, coords={"quantile": q}) + assert_identical(result, expected) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) @@ -5664,6 +5689,62 @@ def test_coarsen_coords_cftime(): np.testing.assert_array_equal(actual.time, expected_times) +def test_coarsen_keep_attrs(): + _attrs = {"units": "test", "long_name": "testing"} + + var1 = np.linspace(10, 15, 100) + var2 = np.linspace(5, 10, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={"var1": ("coord", var1), "var2": ("coord", var2)}, + coords={"coord": coords}, + attrs=_attrs, + ) + + # Test dropped attrs + dat = ds.coarsen(coord=5).mean() + assert dat.attrs == {} + + # Test kept attrs using dataset keyword + dat = ds.coarsen(coord=5, keep_attrs=True).mean() + assert dat.attrs == _attrs + + # Test kept attrs using global option + with set_options(keep_attrs=True): + dat = ds.coarsen(coord=5).mean() + assert dat.attrs == _attrs + + +def test_rolling_keep_attrs(): + _attrs = {"units": "test", "long_name": "testing"} + + var1 = np.linspace(10, 15, 100) + var2 = np.linspace(5, 10, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={"var1": ("coord", var1), "var2": ("coord", var2)}, + coords={"coord": coords}, + attrs=_attrs, + ) + + # Test dropped attrs + dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean() + assert dat.attrs == {} + + # Test kept attrs using dataset keyword + dat = ds.rolling( + dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True + ).mean() + assert dat.attrs == _attrs + + # Test kept attrs using global option + with set_options(keep_attrs=True): + dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean() + assert dat.attrs == _attrs + + def test_rolling_properties(ds): # catching invalid args with pytest.raises(ValueError, match="exactly one dim/window should"): diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index fea24ff93f8..01357000b20 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -130,5 +130,5 @@ def test_repr_of_dataset(dataset): assert ( formatted.count("class='xr-section-summary-in' type='checkbox' checked>") == 3 ) - assert "<U4" in formatted + assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 28bdf47838b..3845eb921a1 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -516,6 +516,11 @@ def test_groupby_reduce_dimension_error(array): assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"])) +def test_groupby_multiple_string_args(array): + with pytest.raises(TypeError): + array.groupby("x", "y") + + def test_groupby_bins_timeseries(): ds = xr.Dataset() ds["time"] = xr.DataArray( diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index f155acbf494..19f74476ced 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -68,12 +68,12 @@ def test_nested_options(): def test_display_style(): - original = "text" + original = "html" assert OPTIONS["display_style"] == original with pytest.raises(ValueError): xarray.set_options(display_style="invalid_str") - with xarray.set_options(display_style="html"): - assert OPTIONS["display_style"] == "html" + with xarray.set_options(display_style="text"): + assert OPTIONS["display_style"] == "text" assert OPTIONS["display_style"] == original @@ -177,10 +177,11 @@ def test_merge_attr_retention(self): def test_display_style_text(self): ds = create_test_dataset_attrs() - text = ds._repr_html_() - assert text.startswith("
")
-        assert "'nested'" in text
-        assert "<xarray.Dataset>" in text
+        with xarray.set_options(display_style="text"):
+            text = ds._repr_html_()
+            assert text.startswith("
")
+            assert "'nested'" in text
+            assert "<xarray.Dataset>" in text
 
     def test_display_style_html(self):
         ds = create_test_dataset_attrs()
@@ -191,9 +192,10 @@ def test_display_style_html(self):
 
     def test_display_dataarray_style_text(self):
         da = create_test_dataarray_attrs()
-        text = da._repr_html_()
-        assert text.startswith("
")
-        assert "<xarray.DataArray 'var1'" in text
+        with xarray.set_options(display_style="text"):
+            text = da._repr_html_()
+            assert text.startswith("
")
+            assert "<xarray.DataArray 'var1'" in text
 
     def test_display_dataarray_style_html(self):
         da = create_test_dataarray_attrs()
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index 9f63ebb1d42..bef3af62d74 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -1,3 +1,4 @@
+import functools
 import operator
 from distutils.version import LooseVersion
 
@@ -8,6 +9,7 @@
 import xarray as xr
 from xarray.core import formatting
 from xarray.core.npcompat import IS_NEP18_ACTIVE
+from xarray.testing import assert_allclose, assert_identical
 
 from .test_variable import VariableSubclassobjects
 
@@ -70,53 +72,17 @@ def array_strip_units(array):
         return array
 
 
-def array_attach_units(data, unit, convert_from=None):
-    try:
-        unit, convert_from = unit
-    except TypeError:
-        pass
-
+def array_attach_units(data, unit):
     if isinstance(data, Quantity):
-        if not convert_from:
-            raise ValueError(
-                "cannot attach unit {unit} to quantity ({data.units})".format(
-                    unit=unit, data=data
-                )
-            )
-        elif isinstance(convert_from, unit_registry.Unit):
-            data = data.magnitude
-        elif convert_from is True:  # intentionally accept exactly true
-            if data.check(unit):
-                convert_from = data.units
-                data = data.magnitude
-            else:
-                raise ValueError(
-                    "cannot convert quantity ({data.units}) to {unit}".format(
-                        unit=unit, data=data
-                    )
-                )
-        else:
-            raise ValueError(
-                "cannot convert from invalid unit {convert_from}".format(
-                    convert_from=convert_from
-                )
-            )
+        raise ValueError(f"cannot attach unit {unit} to quantity {data}")
 
-    # to make sure we also encounter the case of "equal if converted"
-    if convert_from is not None:
-        quantity = (data * convert_from).to(
-            unit
-            if isinstance(unit, unit_registry.Unit)
-            else unit_registry.dimensionless
-        )
-    else:
-        try:
-            quantity = data * unit
-        except np.core._exceptions.UFuncTypeError:
-            if unit != 1:
-                raise
+    try:
+        quantity = data * unit
+    except np.core._exceptions.UFuncTypeError:
+        if isinstance(unit, unit_registry.Unit):
+            raise
 
-            quantity = data
+        quantity = data
 
     return quantity
 
@@ -241,6 +207,11 @@ def attach_units(obj, units):
 
 
 def convert_units(obj, to):
+    # preprocess
+    to = {
+        key: None if not isinstance(value, unit_registry.Unit) else value
+        for key, value in to.items()
+    }
     if isinstance(obj, xr.Dataset):
         data_vars = {
             name: convert_units(array.variable, {None: to.get(name)})
@@ -282,6 +253,7 @@ def convert_units(obj, to):
 
 
 def assert_units_equal(a, b):
+    __tracebackhide__ = True
     assert extract_units(a) == extract_units(b)
 
 
@@ -414,9 +386,8 @@ def __repr__(self):
         return f"function_{self.name}"
 
 
-@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way")
 def test_apply_ufunc_dataarray(dtype):
-    func = function(
+    func = functools.partial(
         xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
     )
 
@@ -427,12 +398,12 @@ def test_apply_ufunc_dataarray(dtype):
     expected = attach_units(func(strip_units(data_array)), extract_units(data_array))
     actual = func(data_array)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
-@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way")
 def test_apply_ufunc_dataset(dtype):
-    func = function(
+    func = functools.partial(
         xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1}
     )
 
@@ -450,10 +421,10 @@ def test_apply_ufunc_dataset(dtype):
     expected = attach_units(func(strip_units(ds)), extract_units(ds))
     actual = func(ds)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
-@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -475,36 +446,40 @@ def test_apply_ufunc_dataset(dtype):
         "coords",
     ),
 )
-@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan)))
+@pytest.mark.parametrize("fill_value", (10, np.nan))
 def test_align_dataarray(fill_value, variant, unit, error, dtype):
     original_unit = unit_registry.m
 
     variants = {
-        "data": (unit, 1, 1),
-        "dims": (original_unit, unit, 1),
-        "coords": (original_unit, 1, unit),
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
     }
     data_unit, dim_unit, coord_unit = variants.get(variant)
 
     array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
     array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
     x = np.arange(2) * original_unit
-    x_a1 = np.array([10, 5]) * original_unit
-    x_a2 = np.array([10, 5]) * coord_unit
 
     y1 = np.arange(5) * original_unit
     y2 = np.arange(2, 7) * dim_unit
+    y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit
+    y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit
 
-    data_array1 = xr.DataArray(
-        data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y")
-    )
-    data_array2 = xr.DataArray(
-        data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y")
-    )
+    coords1 = {"x": x, "y": y1}
+    coords2 = {"x": x, "y": y2}
+    if variant == "coords":
+        coords1["y_a"] = ("y", y_a1)
+        coords2["y_a"] = ("y", y_a2)
+
+    data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y"))
+    data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y"))
 
     fill_value = fill_value * data_unit
     func = function(xr.align, join="outer", fill_value=fill_value)
-    if error is not None:
+    if error is not None and not (
+        np.isnan(fill_value) and not isinstance(fill_value, Quantity)
+    ):
         with pytest.raises(error):
             func(data_array1, data_array2)
 
@@ -524,15 +499,19 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
         **stripped_kwargs,
     )
     expected_a = attach_units(expected_a, units_a)
-    expected_b = convert_units(attach_units(expected_b, units_a), units_b)
+    if isinstance(array2, Quantity):
+        expected_b = convert_units(attach_units(expected_b, units_a), units_b)
+    else:
+        expected_b = attach_units(expected_b, units_b)
 
     actual_a, actual_b = func(data_array1, data_array2)
 
-    assert_equal_with_units(expected_a, actual_a)
-    assert_equal_with_units(expected_b, actual_b)
+    assert_units_equal(expected_a, actual_a)
+    assert_allclose(expected_a, actual_a)
+    assert_units_equal(expected_b, actual_b)
+    assert_allclose(expected_b, actual_b)
 
 
-@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -558,31 +537,37 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
 def test_align_dataset(fill_value, unit, variant, error, dtype):
     original_unit = unit_registry.m
 
-    variants = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)}
+    variants = {
+        "data": (unit, original_unit, original_unit),
+        "dims": (original_unit, unit, original_unit),
+        "coords": (original_unit, original_unit, unit),
+    }
     data_unit, dim_unit, coord_unit = variants.get(variant)
 
     array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit
     array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit
 
     x = np.arange(2) * original_unit
-    x_a1 = np.array([10, 5]) * original_unit
-    x_a2 = np.array([10, 5]) * coord_unit
 
     y1 = np.arange(5) * original_unit
     y2 = np.arange(2, 7) * dim_unit
+    y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit
+    y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit
 
-    ds1 = xr.Dataset(
-        data_vars={"a": (("x", "y"), array1)},
-        coords={"x": x, "x_a": ("x", x_a1), "y": y1},
-    )
-    ds2 = xr.Dataset(
-        data_vars={"a": (("x", "y"), array2)},
-        coords={"x": x, "x_a": ("x", x_a2), "y": y2},
-    )
+    coords1 = {"x": x, "y": y1}
+    coords2 = {"x": x, "y": y2}
+    if variant == "coords":
+        coords1["y_a"] = ("y", y_a1)
+        coords2["y_a"] = ("y", y_a2)
+
+    ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1)
+    ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2)
 
     fill_value = fill_value * data_unit
     func = function(xr.align, join="outer", fill_value=fill_value)
-    if error is not None:
+    if error is not None and not (
+        np.isnan(fill_value) and not isinstance(fill_value, Quantity)
+    ):
         with pytest.raises(error):
             func(ds1, ds2)
 
@@ -600,12 +585,17 @@ def test_align_dataset(fill_value, unit, variant, error, dtype):
         strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs
     )
     expected_a = attach_units(expected_a, units_a)
-    expected_b = convert_units(attach_units(expected_b, units_a), units_b)
+    if isinstance(array2, Quantity):
+        expected_b = convert_units(attach_units(expected_b, units_a), units_b)
+    else:
+        expected_b = attach_units(expected_b, units_b)
 
     actual_a, actual_b = func(ds1, ds2)
 
-    assert_equal_with_units(expected_a, actual_a)
-    assert_equal_with_units(expected_b, actual_b)
+    assert_units_equal(expected_a, actual_a)
+    assert_allclose(expected_a, actual_a)
+    assert_units_equal(expected_b, actual_b)
+    assert_allclose(expected_b, actual_b)
 
 
 def test_broadcast_dataarray(dtype):
@@ -615,28 +605,53 @@ def test_broadcast_dataarray(dtype):
     a = xr.DataArray(data=array1, dims="x")
     b = xr.DataArray(data=array2, dims="y")
 
-    expected_a, expected_b = tuple(
-        attach_units(elem, extract_units(a))
-        for elem in xr.broadcast(strip_units(a), strip_units(b))
-    )
+    units_a = extract_units(a)
+    units_b = extract_units(b)
+    expected_a, expected_b = xr.broadcast(strip_units(a), strip_units(b))
+    expected_a = attach_units(expected_a, units_a)
+    expected_b = convert_units(attach_units(expected_b, units_a), units_b)
+
     actual_a, actual_b = xr.broadcast(a, b)
 
-    assert_equal_with_units(expected_a, actual_a)
-    assert_equal_with_units(expected_b, actual_b)
+    assert_units_equal(expected_a, actual_a)
+    assert_identical(expected_a, actual_a)
+    assert_units_equal(expected_b, actual_b)
+    assert_identical(expected_b, actual_b)
 
 
 def test_broadcast_dataset(dtype):
     array1 = np.linspace(0, 10, 2) * unit_registry.Pa
     array2 = np.linspace(0, 10, 3) * unit_registry.Pa
 
-    ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)})
+    x1 = np.arange(2)
+    y1 = np.arange(3)
+
+    x2 = np.arange(2, 4)
+    y2 = np.arange(3, 6)
 
-    (expected,) = tuple(
-        attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds))
+    ds = xr.Dataset(
+        data_vars={"a": ("x", array1), "b": ("y", array2)}, coords={"x": x1, "y": y1}
+    )
+    other = xr.Dataset(
+        data_vars={
+            "a": ("x", array1.to(unit_registry.hPa)),
+            "b": ("y", array2.to(unit_registry.hPa)),
+        },
+        coords={"x": x2, "y": y2},
     )
-    (actual,) = xr.broadcast(ds)
 
-    assert_equal_with_units(expected, actual)
+    units_a = extract_units(ds)
+    units_b = extract_units(other)
+    expected_a, expected_b = xr.broadcast(strip_units(ds), strip_units(other))
+    expected_a = attach_units(expected_a, units_a)
+    expected_b = attach_units(expected_b, units_b)
+
+    actual_a, actual_b = xr.broadcast(ds, other)
+
+    assert_units_equal(expected_a, actual_a)
+    assert_identical(expected_a, actual_a)
+    assert_units_equal(expected_b, actual_b)
+    assert_identical(expected_b, actual_b)
 
 
 @pytest.mark.parametrize(
@@ -706,7 +721,8 @@ def test_combine_by_coords(variant, unit, error, dtype):
     )
     actual = xr.combine_by_coords([ds, other])
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -717,12 +733,7 @@ def test_combine_by_coords(variant, unit, error, dtype):
             unit_registry.dimensionless, DimensionalityError, id="dimensionless"
         ),
         pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
-        pytest.param(
-            unit_registry.mm,
-            None,
-            id="compatible_unit",
-            marks=pytest.mark.xfail(reason="wrong order of arguments to `where`"),
-        ),
+        pytest.param(unit_registry.mm, None, id="compatible_unit"),
         pytest.param(unit_registry.m, None, id="identical_unit"),
     ),
     ids=repr,
@@ -810,7 +821,8 @@ def test_combine_nested(variant, unit, error, dtype):
     )
     actual = func([[ds1, ds2], [ds3, ds4]])
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -862,7 +874,8 @@ def test_concat_dataarray(variant, unit, error, dtype):
     )
     actual = xr.concat([arr1, arr2], dim="x")
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -912,10 +925,10 @@ def test_concat_dataset(variant, unit, error, dtype):
     )
     actual = xr.concat([ds1, ds2], dim="x")
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
-@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -948,64 +961,81 @@ def test_merge_dataarray(variant, unit, error, dtype):
     data_unit, dim_unit, coord_unit = variants.get(variant)
 
     array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit
+    x1 = np.arange(2) * original_unit
+    y1 = np.arange(3) * original_unit
+    u1 = np.linspace(10, 20, 2) * original_unit
+    v1 = np.linspace(10, 20, 3) * original_unit
+
     array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit
-    array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit
+    x2 = np.arange(2, 4) * dim_unit
+    z2 = np.arange(4) * original_unit
+    u2 = np.linspace(20, 30, 2) * coord_unit
+    w2 = np.linspace(10, 20, 4) * original_unit
 
-    x = np.arange(2) * original_unit
-    y = np.arange(3) * original_unit
-    z = np.arange(4) * original_unit
-    u = np.linspace(10, 20, 2) * original_unit
-    v = np.linspace(10, 20, 3) * original_unit
-    w = np.linspace(10, 20, 4) * original_unit
+    array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit
+    y3 = np.arange(3, 6) * dim_unit
+    z3 = np.arange(4, 8) * dim_unit
+    v3 = np.linspace(10, 20, 3) * coord_unit
+    w3 = np.linspace(10, 20, 4) * coord_unit
 
     arr1 = xr.DataArray(
         name="a",
         data=array1,
-        coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)},
+        coords={"x": x1, "y": y1, "u": ("x", u1), "v": ("y", v1)},
         dims=("x", "y"),
     )
     arr2 = xr.DataArray(
-        name="b",
+        name="a",
         data=array2,
-        coords={
-            "x": np.arange(2, 4) * dim_unit,
-            "z": z,
-            "u": ("x", np.linspace(20, 30, 2) * coord_unit),
-            "w": ("z", w),
-        },
+        coords={"x": x2, "z": z2, "u": ("x", u2), "w": ("z", w2)},
         dims=("x", "z"),
     )
     arr3 = xr.DataArray(
-        name="c",
+        name="a",
         data=array3,
-        coords={
-            "y": np.arange(3, 6) * dim_unit,
-            "z": np.arange(4, 8) * dim_unit,
-            "v": ("y", np.linspace(10, 20, 3) * coord_unit),
-            "w": ("z", np.linspace(10, 20, 4) * coord_unit),
-        },
+        coords={"y": y3, "z": z3, "v": ("y", v3), "w": ("z", w3)},
         dims=("y", "z"),
     )
 
-    func = function(xr.merge)
     if error is not None:
         with pytest.raises(error):
-            func([arr1, arr2, arr3])
+            xr.merge([arr1, arr2, arr3])
 
         return
 
-    units = {name: original_unit for name in list("abcuvwxyz")}
+    units = {name: original_unit for name in list("axyzuvw")}
+
     convert_and_strip = lambda arr: strip_units(convert_units(arr, units))
-    expected = attach_units(
-        func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]),
-        units,
+    expected_units = {
+        "a": original_unit,
+        "u": original_unit,
+        "v": original_unit,
+        "w": original_unit,
+        "x": original_unit,
+        "y": original_unit,
+        "z": original_unit,
+    }
+
+    expected = convert_units(
+        attach_units(
+            xr.merge(
+                [
+                    convert_and_strip(arr1),
+                    convert_and_strip(arr2),
+                    convert_and_strip(arr3),
+                ]
+            ),
+            units,
+        ),
+        expected_units,
     )
-    actual = func([arr1, arr2, arr3])
 
-    assert_equal_with_units(expected, actual)
+    actual = xr.merge([arr1, arr2, arr3])
+
+    assert_units_equal(expected, actual)
+    assert_allclose(expected, actual)
 
 
-@pytest.mark.xfail(reason="blocked by `reindex` / `where`")
 @pytest.mark.parametrize(
     "unit,error",
     (
@@ -1046,7 +1076,7 @@ def test_merge_dataset(variant, unit, error, dtype):
 
     ds1 = xr.Dataset(
         data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)},
-        coords={"x": x, "y": y, "z": ("x", z)},
+        coords={"x": x, "y": y, "u": ("x", z)},
     )
     ds2 = xr.Dataset(
         data_vars={
@@ -1056,18 +1086,18 @@ def test_merge_dataset(variant, unit, error, dtype):
         coords={
             "x": np.arange(3) * dim_unit,
             "y": np.arange(2, 4) * dim_unit,
-            "z": ("x", np.arange(-3, 0) * coord_unit),
+            "u": ("x", np.arange(-3, 0) * coord_unit),
         },
     )
     ds3 = xr.Dataset(
         data_vars={
-            "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit),
-            "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit),
+            "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit),
+            "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit),
         },
         coords={
             "x": np.arange(3, 6) * dim_unit,
             "y": np.arange(4, 6) * dim_unit,
-            "z": ("x", np.arange(3, 6) * coord_unit),
+            "u": ("x", np.arange(3, 6) * coord_unit),
         },
     )
 
@@ -1080,12 +1110,20 @@ def test_merge_dataset(variant, unit, error, dtype):
 
     units = extract_units(ds1)
     convert_and_strip = lambda ds: strip_units(convert_units(ds, units))
-    expected = attach_units(
-        func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units
+    expected_units = {name: original_unit for name in list("abxyzu")}
+    expected = convert_units(
+        attach_units(
+            func(
+                [convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]
+            ),
+            units,
+        ),
+        expected_units,
     )
     actual = func([ds1, ds2, ds3])
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_allclose(expected, actual)
 
 
 @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
@@ -1094,10 +1132,12 @@ def test_replication_dataarray(func, dtype):
     data_array = xr.DataArray(data=array, dims="x")
 
     numpy_func = getattr(np, func.__name__)
-    expected = xr.DataArray(data=numpy_func(array), dims="x")
+    units = extract_units(numpy_func(data_array))
+    expected = attach_units(func(data_array), units)
     actual = func(data_array)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
@@ -1114,12 +1154,13 @@ def test_replication_dataset(func, dtype):
     )
 
     numpy_func = getattr(np, func.__name__)
-    expected = ds.copy(
-        data={name: numpy_func(array.data) for name, array in ds.data_vars.items()}
-    )
+    units = extract_units(ds.map(numpy_func))
+    expected = attach_units(func(strip_units(ds)), units)
+
     actual = func(ds)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.xfail(
@@ -1158,7 +1199,8 @@ def test_replication_full_like_dataarray(unit, error, dtype):
     )
     actual = xr.full_like(data_array, fill_value=fill_value)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.xfail(
@@ -1208,7 +1250,8 @@ def test_replication_full_like_dataset(unit, error, dtype):
     )
     actual = xr.full_like(ds, fill_value=fill_value)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -1250,7 +1293,8 @@ def test_where_dataarray(fill_value, unit, error, dtype):
     )
     actual = xr.where(cond, x, fill_value)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -1294,7 +1338,8 @@ def test_where_dataset(fill_value, unit, error, dtype):
     )
     actual = xr.where(cond, ds, fill_value)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 def test_dot_dataarray(dtype):
@@ -1315,7 +1360,8 @@ def test_dot_dataarray(dtype):
     )
     actual = xr.dot(data_array, other)
 
-    assert_equal_with_units(expected, actual)
+    assert_units_equal(expected, actual)
+    assert_identical(expected, actual)
 
 
 def delete_attrs(*to_delete):
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 62fde920b1e..c600f7a77d0 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -9,7 +9,7 @@
 import pytz
 
 from xarray import Coordinate, Dataset, IndexVariable, Variable, set_options
-from xarray.core import dtypes, indexing
+from xarray.core import dtypes, duck_array_ops, indexing
 from xarray.core.common import full_like, ones_like, zeros_like
 from xarray.core.indexing import (
     BasicIndexer,
@@ -1511,14 +1511,16 @@ def test_reduce(self):
         with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"):
             v.mean(dim="x", allow_lazy=False)
 
+    @pytest.mark.parametrize("skipna", [True, False])
     @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
     @pytest.mark.parametrize(
         "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
     )
-    def test_quantile(self, q, axis, dim):
+    def test_quantile(self, q, axis, dim, skipna):
         v = Variable(["x", "y"], self.d)
-        actual = v.quantile(q, dim=dim)
-        expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis)
+        actual = v.quantile(q, dim=dim, skipna=skipna)
+        _percentile_func = np.nanpercentile if skipna else np.percentile
+        expected = _percentile_func(self.d, np.array(q) * 100, axis=axis)
         np.testing.assert_allclose(actual.values, expected)
 
     @requires_dask
@@ -1879,6 +1881,26 @@ def test_coarsen_2d(self):
         expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
         assert_equal(actual, expected)
 
+    # perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops])
+    def test_coarsen_keep_attrs(self, operation="mean"):
+        _attrs = {"units": "test", "long_name": "testing"}
+
+        test_func = getattr(duck_array_ops, operation, None)
+
+        # Test dropped attrs
+        with set_options(keep_attrs=False):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == {}
+
+        # Test kept attrs
+        with set_options(keep_attrs=True):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == _attrs
+
 
 @requires_dask
 class TestVariableWithDask(VariableSubclassobjects):