From 3cdf40daefda2533905f6dda4a27395d051e37e1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Jul 2023 15:27:26 +0200 Subject: [PATCH 1/5] Bump pypa/gh-action-pypi-publish from 1.8.7 to 1.8.8 (#7994) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.7 to 1.8.8. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.7...v1.8.8) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 510572edcf7..1afafd4550d 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -88,7 +88,7 @@ jobs: path: dist - name: Publish package to TestPyPI if: github.event_name == 'push' - uses: pypa/gh-action-pypi-publish@v1.8.7 + uses: pypa/gh-action-pypi-publish@v1.8.8 with: repository_url: https://test.pypi.org/legacy/ verbose: true @@ -111,6 +111,6 @@ jobs: name: releases path: dist - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.8.7 + uses: pypa/gh-action-pypi-publish@v1.8.8 with: verbose: true From 647376d1d2db3210c142d8204c1c3a7431b85b9a Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Mon, 17 Jul 2023 15:59:39 +0100 Subject: [PATCH 2/5] Use variable name in all exceptions raised in `as_variable` (#7995) * Use variable name in all exceptions raised in `as_variable` This is more consistent with the other exceptions raised in this function, and helps to more quickly diagnose issues when e.g. creating datasets * Add `as_variable` change to `whats-new` --- doc/whats-new.rst | 2 ++ xarray/core/variable.py | 10 ++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 271e4ebdbb4..41dd734cd41 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -42,6 +42,8 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- :py:func:`as_variable` now consistently includes the variable name in any exceptions + raised. (:pull:`7995`). By `Peter Hill `_ .. _whats-new.2023.07.0: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9271d0c4dbd..d7c927bfd7a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -125,17 +125,15 @@ def as_variable(obj, name=None) -> Variable | IndexVariable: elif isinstance(obj, tuple): if isinstance(obj[1], DataArray): raise TypeError( - "Using a DataArray object to construct a variable is" + f"Variable {name!r}: Using a DataArray object to construct a variable is" " ambiguous, please extract the data using the .data property." ) try: obj = Variable(*obj) except (TypeError, ValueError) as error: - # use .format() instead of % because it handles tuples consistently raise error.__class__( - "Could not convert tuple of form " - "(dims, data[, attrs, encoding]): " - "{} to Variable.".format(obj) + f"Variable {name!r}: Could not convert tuple of form " + f"(dims, data[, attrs, encoding]): {obj} to Variable." ) elif utils.is_scalar(obj): obj = Variable([], obj) @@ -154,7 +152,7 @@ def as_variable(obj, name=None) -> Variable | IndexVariable: obj = Variable(name, data, fastpath=True) else: raise TypeError( - "unable to convert object into a variable without an " + f"Variable {name!r}: unable to convert object into a variable without an " f"explicit list of dimensions: {obj!r}" ) From 7234603781768728b3fd544cdcaca991466d4a44 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Jul 2023 01:23:21 +0200 Subject: [PATCH 3/5] Add documentation on custom indexes (#6975) * improve Index base class type annotations Use T_Index generic when possible. * import Index base class in Xarray root namespace * import IndexSelResult into Xarray root namespace * wip: Index API docstrings * wip: doc: add how to add custom index section * add Index method docstrings * add user guide on how to create a custom index * review comments + tweaks * update what's new * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply uncontroversial suggestions from Deepak's code review Co-authored-by: Deepak Cherian * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply more suggestions from code review Co-authored-by: Deepak Cherian * Link to source code for PandasIndex and PandasMultiIndex --------- Co-authored-by: Thomas Nicholas Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- doc/api-hidden.rst | 15 + doc/api.rst | 3 +- doc/internals/how-to-create-custom-index.rst | 233 +++++++++++++ doc/internals/index.rst | 1 + doc/whats-new.rst | 4 + xarray/__init__.py | 4 + xarray/core/indexes.py | 329 +++++++++++++++++-- 7 files changed, 565 insertions(+), 24 deletions(-) create mode 100644 doc/internals/how-to-create-custom-index.rst diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 5d825be2e08..1a2b1d11747 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -451,6 +451,21 @@ CFTimeIndex.values CFTimeIndex.year + Index.from_variables + Index.concat + Index.stack + Index.unstack + Index.create_variables + Index.to_pandas_index + Index.isel + Index.sel + Index.join + Index.reindex_like + Index.equals + Index.roll + Index.rename + Index.copy + backends.NetCDF4DataStore.close backends.NetCDF4DataStore.encode backends.NetCDF4DataStore.encode_attribute diff --git a/doc/api.rst b/doc/api.rst index 34d6558ed55..9bac1c40af8 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1090,7 +1090,8 @@ Advanced API Variable IndexVariable as_variable - indexes.Index + Index + IndexSelResult Context register_dataset_accessor register_dataarray_accessor diff --git a/doc/internals/how-to-create-custom-index.rst b/doc/internals/how-to-create-custom-index.rst new file mode 100644 index 00000000000..93805229db1 --- /dev/null +++ b/doc/internals/how-to-create-custom-index.rst @@ -0,0 +1,233 @@ +.. currentmodule:: xarray + +How to create a custom index +============================ + +.. warning:: + + This feature is highly experimental. Support for custom indexes has been + introduced in v2022.06.0 and is still incomplete. API is subject to change + without deprecation notice. However we encourage you to experiment and report issues that arise. + +Xarray's built-in support for label-based indexing (e.g. `ds.sel(latitude=40, method="nearest")`) and alignment operations +relies on :py:class:`pandas.Index` objects. Pandas Indexes are powerful and suitable for many +applications but also have some limitations: + +- it only works with 1-dimensional coordinates where explicit labels + are fully loaded in memory +- it is hard to reuse it with irregular data for which there exist more + efficient, tree-based structures to perform data selection +- it doesn't support extra metadata that may be required for indexing and + alignment (e.g., a coordinate reference system) + +Fortunately, Xarray now allows extending this functionality with custom indexes, +which can be implemented in 3rd-party libraries. + +The Index base class +-------------------- + +Every Xarray index must inherit from the :py:class:`Index` base class. It is for +example the case of Xarray built-in ``PandasIndex`` and ``PandasMultiIndex`` +subclasses, which wrap :py:class:`pandas.Index` and +:py:class:`pandas.MultiIndex` respectively. + +The ``Index`` API closely follows the :py:class:`Dataset` and +:py:class:`DataArray` API, e.g., for an index to support :py:meth:`DataArray.sel` it needs to +implement :py:meth:`Index.sel`, to support :py:meth:`DataArray.stack` and :py:meth:`DataArray.unstack` it +needs to implement :py:meth:`Index.stack` and :py:meth:`Index.unstack`, etc. + +Some guidelines and examples are given below. More details can be found in the +documented :py:class:`Index` API. + +Minimal requirements +-------------------- + +Every index must at least implement the :py:meth:`Index.from_variables` class +method, which is used by Xarray to build a new index instance from one or more +existing coordinates in a Dataset or DataArray. + +Since any collection of coordinates can be passed to that method (i.e., the +number, order and dimensions of the coordinates are all arbitrary), it is the +responsibility of the index to check the consistency and validity of those input +coordinates. + +For example, :py:class:`~xarray.core.indexes.PandasIndex` accepts only one coordinate and +:py:class:`~xarray.core.indexes.PandasMultiIndex` accepts one or more 1-dimensional coordinates that must all +share the same dimension. Other, custom indexes need not have the same +constraints, e.g., + +- a georeferenced raster index which only accepts two 1-d coordinates with + distinct dimensions +- a staggered grid index which takes coordinates with different dimension name + suffixes (e.g., "_c" and "_l" for center and left) + +Optional requirements +--------------------- + +Pretty much everything else is optional. Depending on the method, in the absence +of a (re)implementation, an index will either raise a `NotImplementedError` +or won't do anything specific (just drop, pass or copy itself +from/to the resulting Dataset or DataArray). + +For example, you can just skip re-implementing :py:meth:`Index.rename` if there +is no internal attribute or object to rename according to the new desired +coordinate or dimension names. In the case of ``PandasIndex``, we rename the +underlying ``pandas.Index`` object and/or update the ``PandasIndex.dim`` +attribute since the associated dimension name has been changed. + +Wrap index data as coordinate data +---------------------------------- + +In some cases it is possible to reuse the index's underlying object or structure +as coordinate data and hence avoid data duplication. + +For ``PandasIndex`` and ``PandasMultiIndex``, we +leverage the fact that ``pandas.Index`` objects expose some array-like API. In +Xarray we use some wrappers around those underlying objects as a thin +compatibility layer to preserve dtypes, handle explicit and n-dimensional +indexing, etc. + +Other structures like tree-based indexes (e.g., kd-tree) may differ too much +from arrays to reuse it as coordinate data. + +If the index data can be reused as coordinate data, the ``Index`` subclass +should implement :py:meth:`Index.create_variables`. This method accepts a +dictionary of variable names as keys and :py:class:`Variable` objects as values (used for propagating +variable metadata) and should return a dictionary of new :py:class:`Variable` or +:py:class:`IndexVariable` objects. + +Data selection +-------------- + +For an index to support label-based selection, it needs to at least implement +:py:meth:`Index.sel`. This method accepts a dictionary of labels where the keys +are coordinate names (already filtered for the current index) and the values can +be pretty much anything (e.g., a slice, a tuple, a list, a numpy array, a +:py:class:`Variable` or a :py:class:`DataArray`). It is the responsibility of +the index to properly handle those input labels. + +:py:meth:`Index.sel` must return an instance of :py:class:`IndexSelResult`. The +latter is a small data class that holds positional indexers (indices) and that +may also hold new variables, new indexes, names of variables or indexes to drop, +names of dimensions to rename, etc. For example, this is useful in the case of +``PandasMultiIndex`` as it allows Xarray to convert it into a single ``PandasIndex`` +when only one level remains after the selection. + +The :py:class:`IndexSelResult` class is also used to merge results from label-based +selection performed by different indexes. Note that it is now possible to have +two distinct indexes for two 1-d coordinates sharing the same dimension, but it +is not currently possible to use those two indexes in the same call to +:py:meth:`Dataset.sel`. + +Optionally, the index may also implement :py:meth:`Index.isel`. In the case of +``PandasIndex`` we use it to create a new index object by just indexing the +underlying ``pandas.Index`` object. In other cases this may not be possible, +e.g., a kd-tree object may not be easily indexed. If ``Index.isel()`` is not +implemented, the index in just dropped in the DataArray or Dataset resulting +from the selection. + +Alignment +--------- + +For an index to support alignment, it needs to implement: + +- :py:meth:`Index.equals`, which compares the index with another index and + returns either ``True`` or ``False`` +- :py:meth:`Index.join`, which combines the index with another index and returns + a new Index object +- :py:meth:`Index.reindex_like`, which queries the index with another index and + returns positional indexers that are used to re-index Dataset or DataArray + variables along one or more dimensions + +Xarray ensures that those three methods are called with an index of the same +type as argument. + +Meta-indexes +------------ + +Nothing prevents writing a custom Xarray index that itself encapsulates other +Xarray index(es). We call such index a "meta-index". + +Here is a small example of a meta-index for geospatial, raster datasets (i.e., +regularly spaced 2-dimensional data) that internally relies on two +``PandasIndex`` instances for the x and y dimensions respectively: + +.. code-block:: python + + from xarray import Index + from xarray.core.indexes import PandasIndex + from xarray.core.indexing import merge_sel_results + + + class RasterIndex(Index): + def __init__(self, xy_indexes): + assert len(xy_indexes) == 2 + + # must have two distinct dimensions + dim = [idx.dim for idx in xy_indexes.values()] + assert dim[0] != dim[1] + + self._xy_indexes = xy_indexes + + @classmethod + def from_variables(cls, variables): + assert len(variables) == 2 + + xy_indexes = { + k: PandasIndex.from_variables({k: v}) for k, v in variables.items() + } + + return cls(xy_indexes) + + def create_variables(self, variables): + idx_variables = {} + + for index in self._xy_indexes.values(): + idx_variables.update(index.create_variables(variables)) + + return idx_variables + + def sel(self, labels): + results = [] + + for k, index in self._xy_indexes.items(): + if k in labels: + results.append(index.sel({k: labels[k]})) + + return merge_sel_results(results) + + +This basic index only supports label-based selection. Providing a full-featured +index by implementing the other ``Index`` methods should be pretty +straightforward for this example, though. + +This example is also not very useful unless we add some extra functionality on +top of the two encapsulated ``PandasIndex`` objects, such as a coordinate +reference system. + +How to use a custom index +------------------------- + +You can use :py:meth:`Dataset.set_xindex` or :py:meth:`DataArray.set_xindex` to assign a +custom index to a Dataset or DataArray, e.g., using the ``RasterIndex`` above: + +.. code-block:: python + + import numpy as np + import xarray as xr + + da = xr.DataArray( + np.random.uniform(size=(100, 50)), + coords={"x": ("x", np.arange(50)), "y": ("y", np.arange(100))}, + dims=("y", "x"), + ) + + # Xarray create default indexes for the 'x' and 'y' coordinates + # we first need to explicitly drop it + da = da.drop_indexes(["x", "y"]) + + # Build a RasterIndex from the 'x' and 'y' coordinates + da_raster = da.set_xindex(["x", "y"], RasterIndex) + + # RasterIndex now takes care of label-based selection + selected = da_raster.sel(x=10, y=slice(20, 50)) diff --git a/doc/internals/index.rst b/doc/internals/index.rst index 666f8fd2343..7e13f0cfe95 100644 --- a/doc/internals/index.rst +++ b/doc/internals/index.rst @@ -25,3 +25,4 @@ The pages in this section are intended for: extending-xarray zarr-encoding-spec how-to-add-new-backend + how-to-create-custom-index diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 41dd734cd41..326c41fbe49 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -795,6 +795,10 @@ Bug fixes Documentation ~~~~~~~~~~~~~ + +- Add docstrings for the :py:class:`Index` base class and add some documentation on how to + create custom, Xarray-compatible indexes (:pull:`6975`) + By `Benoît Bovy `_. - Update merge docstrings. (:issue:`6935`, :pull:`7033`) By `Zach Moon `_. - Raise a more informative error when trying to open a non-existent zarr store. (:issue:`6484`, :pull:`7060`) diff --git a/xarray/__init__.py b/xarray/__init__.py index 75a58053663..87b897cf1ea 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -32,6 +32,8 @@ register_dataarray_accessor, register_dataset_accessor, ) +from xarray.core.indexes import Index +from xarray.core.indexing import IndexSelResult from xarray.core.merge import Context, MergeError, merge from xarray.core.options import get_options, set_options from xarray.core.parallel import map_blocks @@ -100,6 +102,8 @@ "Coordinate", "DataArray", "Dataset", + "Index", + "IndexSelResult", "IndexVariable", "Variable", # Exceptions diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 9ee9bc374d4..bfa8b9da07a 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -24,26 +24,66 @@ ) if TYPE_CHECKING: - from xarray.core.types import ErrorOptions, T_Index + from xarray.core.types import ErrorOptions, JoinOptions, T_Index from xarray.core.variable import Variable + IndexVars = dict[Any, "Variable"] class Index: - """Base class inherited by all xarray-compatible indexes. + """ + Base class inherited by all xarray-compatible indexes. - Do not use this class directly for creating index objects. + Do not use this class directly for creating index objects. Xarray indexes + are created exclusively from subclasses of ``Index``, mostly via Xarray's + public API like ``Dataset.set_xindex``. + + Every subclass must at least implement :py:meth:`Index.from_variables`. The + (re)implementation of the other methods of this base class is optional but + mostly required in order to support operations relying on indexes such as + label-based selection or alignment. + + The ``Index`` API closely follows the :py:meth:`Dataset` and + :py:meth:`DataArray` API, e.g., for an index to support ``.sel()`` it needs + to implement :py:meth:`Index.sel`, to support ``.stack()`` and + ``.unstack()`` it needs to implement :py:meth:`Index.stack` and + :py:meth:`Index.unstack`, etc. + When a method is not (re)implemented, depending on the case the + corresponding operation on a :py:meth:`Dataset` or :py:meth:`DataArray` + either will raise a ``NotImplementedError`` or will simply drop/pass/copy + the index from/to the result. + + Do not use this class directly for creating index objects. """ @classmethod def from_variables( - cls, + cls: type[T_Index], variables: Mapping[Any, Variable], *, options: Mapping[str, Any], - ) -> Index: + ) -> T_Index: + """Create a new index object from one or more coordinate variables. + + This factory method must be implemented in all subclasses of Index. + + The coordinate variables may be passed here in an arbitrary number and + order and each with arbitrary dimensions. It is the responsibility of + the index to check the consistency and validity of these coordinates. + + Parameters + ---------- + variables : dict-like + Mapping of :py:class:`Variable` objects holding the coordinate labels + to index. + + Returns + ------- + index : Index + A new Index object. + """ raise NotImplementedError() @classmethod @@ -53,20 +93,102 @@ def concat( dim: Hashable, positions: Iterable[Iterable[int]] | None = None, ) -> T_Index: + """Create a new index by concatenating one or more indexes of the same + type. + + Implementation is optional but required in order to support + ``concat``. Otherwise it will raise an error if the index needs to be + updated during the operation. + + Parameters + ---------- + indexes : sequence of Index objects + Indexes objects to concatenate together. All objects must be of the + same type. + dim : Hashable + Name of the dimension to concatenate along. + positions : None or list of integer arrays, optional + List of integer arrays which specifies the integer positions to which + to assign each dataset along the concatenated dimension. If not + supplied, objects are concatenated in the provided order. + + Returns + ------- + index : Index + A new Index object. + """ raise NotImplementedError() @classmethod - def stack(cls, variables: Mapping[Any, Variable], dim: Hashable) -> Index: + def stack( + cls: type[T_Index], variables: Mapping[Any, Variable], dim: Hashable + ) -> T_Index: + """Create a new index by stacking coordinate variables into a single new + dimension. + + Implementation is optional but required in order to support ``stack``. + Otherwise it will raise an error when trying to pass the Index subclass + as argument to :py:meth:`Dataset.stack`. + + Parameters + ---------- + variables : dict-like + Mapping of :py:class:`Variable` objects to stack together. + dim : Hashable + Name of the new, stacked dimension. + + Returns + ------- + index + A new Index object. + """ raise NotImplementedError( f"{cls!r} cannot be used for creating an index of stacked coordinates" ) def unstack(self) -> tuple[dict[Hashable, Index], pd.MultiIndex]: + """Unstack a (multi-)index into multiple (single) indexes. + + Implementation is optional but required in order to support unstacking + the coordinates from which this index has been built. + + Returns + ------- + indexes : tuple + A 2-length tuple where the 1st item is a dictionary of unstacked + Index objects and the 2nd item is a :py:class:`pandas.MultiIndex` + object used to unstack unindexed coordinate variables or data + variables. + """ raise NotImplementedError() def create_variables( self, variables: Mapping[Any, Variable] | None = None ) -> IndexVars: + """Maybe create new coordinate variables from this index. + + This method is useful if the index data can be reused as coordinate + variable data. It is often the case when the underlying index structure + has an array-like interface, like :py:class:`pandas.Index` objects. + + The variables given as argument (if any) are either returned as-is + (default behavior) or can be used to copy their metadata (attributes and + encoding) into the new returned coordinate variables. + + Note: the input variables may or may not have been filtered for this + index. + + Parameters + ---------- + variables : dict-like, optional + Mapping of :py:class:`Variable` objects. + + Returns + ------- + index_variables : dict-like + Dictionary of :py:class:`Variable` or :py:class:`IndexVariable` + objects. + """ if variables is not None: # pass through return dict(**variables) @@ -74,51 +196,212 @@ def create_variables( return {} def to_pandas_index(self) -> pd.Index: - """Cast this xarray index to a pandas.Index object or raise a TypeError - if this is not supported. + """Cast this xarray index to a pandas.Index object or raise a + ``TypeError`` if this is not supported. - This method is used by all xarray operations that expect/require a - pandas.Index object. + This method is used by all xarray operations that still rely on + pandas.Index objects. + By default it raises a ``TypeError``, unless it is re-implemented in + subclasses of Index. """ raise TypeError(f"{self!r} cannot be cast to a pandas.Index object") def isel( - self, indexers: Mapping[Any, int | slice | np.ndarray | Variable] - ) -> Index | None: + self: T_Index, indexers: Mapping[Any, int | slice | np.ndarray | Variable] + ) -> T_Index | None: + """Maybe returns a new index from the current index itself indexed by + positional indexers. + + This method should be re-implemented in subclasses of Index if the + wrapped index structure supports indexing operations. For example, + indexing a ``pandas.Index`` is pretty straightforward as it behaves very + much like an array. By contrast, it may be harder doing so for a + structure like a kd-tree that differs much from a simple array. + + If not re-implemented in subclasses of Index, this method returns + ``None``, i.e., calling :py:meth:`Dataset.isel` will either drop the + index in the resulting dataset or pass it unchanged if its corresponding + coordinate(s) are not indexed. + + Parameters + ---------- + indexers : dict + A dictionary of positional indexers as passed from + :py:meth:`Dataset.isel` and where the entries have been filtered + for the current index. + + Returns + ------- + maybe_index : Index + A new Index object or ``None``. + """ return None def sel(self, labels: dict[Any, Any]) -> IndexSelResult: + """Query the index with arbitrary coordinate label indexers. + + Implementation is optional but required in order to support label-based + selection. Otherwise it will raise an error when trying to call + :py:meth:`Dataset.sel` with labels for this index coordinates. + + Coordinate label indexers can be of many kinds, e.g., scalar, list, + tuple, array-like, slice, :py:class:`Variable`, :py:class:`DataArray`, etc. + It is the responsibility of the index to handle those indexers properly. + + Parameters + ---------- + labels : dict + A dictionary of coordinate label indexers passed from + :py:meth:`Dataset.sel` and where the entries have been filtered + for the current index. + + Returns + ------- + sel_results : :py:class:`IndexSelResult` + An index query result object that contains dimension positional indexers. + It may also contain new indexes, coordinate variables, etc. + """ raise NotImplementedError(f"{self!r} doesn't support label-based selection") - def join(self: T_Index, other: T_Index, how: str = "inner") -> T_Index: + def join(self: T_Index, other: T_Index, how: JoinOptions = "inner") -> T_Index: + """Return a new index from the combination of this index with another + index of the same type. + + Implementation is optional but required in order to support alignment. + + Parameters + ---------- + other : Index + The other Index object to combine with this index. + join : str, optional + Method for joining the two indexes (see :py:func:`~xarray.align`). + + Returns + ------- + joined : Index + A new Index object. + """ raise NotImplementedError( f"{self!r} doesn't support alignment with inner/outer join method" ) def reindex_like(self: T_Index, other: T_Index) -> dict[Hashable, Any]: + """Query the index with another index of the same type. + + Implementation is optional but required in order to support alignment. + + Parameters + ---------- + other : Index + The other Index object used to query this index. + + Returns + ------- + dim_positional_indexers : dict + A dictionary where keys are dimension names and values are positional + indexers. + """ raise NotImplementedError(f"{self!r} doesn't support re-indexing labels") - def equals(self, other): # pragma: no cover + def equals(self: T_Index, other: T_Index) -> bool: + """Compare this index with another index of the same type. + + Implemenation is optional but required in order to support alignment. + + Parameters + ---------- + other : Index + The other Index object to compare with this object. + + Returns + ------- + is_equal : bool + ``True`` if the indexes are equal, ``False`` otherwise. + """ raise NotImplementedError() - def roll(self, shifts: Mapping[Any, int]) -> Index | None: + def roll(self: T_Index, shifts: Mapping[Any, int]) -> T_Index | None: + """Roll this index by an offset along one or more dimensions. + + This method can be re-implemented in subclasses of Index, e.g., when the + index can be itself indexed. + + If not re-implemented, this method returns ``None``, i.e., calling + :py:meth:`Dataset.roll` will either drop the index in the resulting + dataset or pass it unchanged if its corresponding coordinate(s) are not + rolled. + + Parameters + ---------- + shifts : mapping of hashable to int, optional + A dict with keys matching dimensions and values given + by integers to rotate each of the given dimensions, as passed + :py:meth:`Dataset.roll`. + + Returns + ------- + rolled : Index + A new index with rolled data. + """ return None def rename( - self, name_dict: Mapping[Any, Hashable], dims_dict: Mapping[Any, Hashable] - ) -> Index: - return self + self: T_Index, + name_dict: Mapping[Any, Hashable], + dims_dict: Mapping[Any, Hashable], + ) -> T_Index: + """Maybe update the index with new coordinate and dimension names. - def __copy__(self) -> Index: - return self._copy(deep=False) + This method should be re-implemented in subclasses of Index if it has + attributes that depend on coordinate or dimension names. - def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Index: - return self._copy(deep=True, memo=memo) + By default (if not re-implemented), it returns the index itself. + + Warning: the input names are not filtered for this method, they may + correspond to any variable or dimension of a Dataset or a DataArray. + + Parameters + ---------- + name_dict : dict-like + Mapping of current variable or coordinate names to the desired names, + as passed from :py:meth:`Dataset.rename_vars`. + dims_dict : dict-like + Mapping of current dimension names to the desired names, as passed + from :py:meth:`Dataset.rename_dims`. + + Returns + ------- + renamed : Index + Index with renamed attributes. + """ + return self def copy(self: T_Index, deep: bool = True) -> T_Index: + """Return a (deep) copy of this index. + + Implementation in subclasses of Index is optional. The base class + implements the default (deep) copy semantics. + + Parameters + ---------- + deep : bool, optional + If true (default), a copy of the internal structures + (e.g., wrapped index) is returned with the new object. + + Returns + ------- + index : Index + A new Index object. + """ return self._copy(deep=deep) + def __copy__(self: T_Index) -> T_Index: + return self.copy(deep=False) + + def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Index: + return self._copy(deep=True, memo=memo) + def _copy( self: T_Index, deep: bool = True, memo: dict[int, Any] | None = None ) -> T_Index: @@ -131,7 +414,7 @@ def _copy( copied.__dict__.update(self.__dict__) return copied - def __getitem__(self, indexer: Any): + def __getitem__(self: T_Index, indexer: Any) -> T_Index: raise NotImplementedError() def _repr_inline_(self, max_width): From b88d8419ddd87c4f1c3217841a25f17b5ef68c82 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jul 2023 17:23:46 -0600 Subject: [PATCH 4/5] [pre-commit.ci] pre-commit autoupdate (#7997) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.0.277 → v0.0.278](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.277...v0.0.278) - [github.com/psf/black: 23.3.0 → 23.7.0](https://github.com/psf/black/compare/23.3.0...23.7.0) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 6 +++--- xarray/core/common.py | 8 ++------ xarray/core/dataarray.py | 6 +----- xarray/core/formatting.py | 12 +++--------- xarray/core/missing.py | 4 +--- xarray/core/parallel.py | 4 +--- xarray/core/variable.py | 8 ++------ xarray/testing.py | 4 +--- 8 files changed, 14 insertions(+), 38 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02554cc7a45..eca14fe0631 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,13 +16,13 @@ repos: files: ^xarray/ - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.0.277' + rev: 'v0.0.278' hooks: - id: ruff args: ["--fix"] # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.7.0 hooks: - id: black-jupyter - repo: https://github.com/keewis/blackdoc @@ -30,7 +30,7 @@ repos: hooks: - id: blackdoc exclude: "generate_aggregations.py" - additional_dependencies: ["black==23.3.0"] + additional_dependencies: ["black==23.7.0"] - id: blackdoc-autoupdate-black - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.4.1 diff --git a/xarray/core/common.py b/xarray/core/common.py index 5dd4c4dbd96..d54c259ae2c 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -307,9 +307,7 @@ def __setattr__(self, name: str, value: Any) -> None: except AttributeError as e: # Don't accidentally shadow custom AttributeErrors, e.g. # DataArray.dims.setter - if str(e) != "{!r} object has no attribute {!r}".format( - type(self).__name__, name - ): + if str(e) != f"{type(self).__name__!r} object has no attribute {name!r}": raise raise AttributeError( f"cannot set attribute {name!r} on a {type(self).__name__!r} object. Use __setitem__ style" @@ -1293,9 +1291,7 @@ def isin(self: T_DataWithCoords, test_elements: Any) -> T_DataWithCoords: if isinstance(test_elements, Dataset): raise TypeError( - "isin() argument must be convertible to an array: {}".format( - test_elements - ) + f"isin() argument must be convertible to an array: {test_elements}" ) elif isinstance(test_elements, (Variable, DataArray)): # need to explicitly pull out data to support dask arrays as the diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 9635d678c36..bbaf79e23ba 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4676,11 +4676,7 @@ def _title_for_slice(self, truncate: int = 50) -> str: for dim, coord in self.coords.items(): if coord.size == 1: one_dims.append( - "{dim} = {v}{unit}".format( - dim=dim, - v=format_item(coord.values), - unit=_get_units_from_attrs(coord), - ) + f"{dim} = {format_item(coord.values)}{_get_units_from_attrs(coord)}" ) title = ", ".join(one_dims) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 7f93706c74c..06f84c3eee1 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -697,9 +697,7 @@ def dataset_repr(ds): def diff_dim_summary(a, b): if a.dims != b.dims: - return "Differing dimensions:\n ({}) != ({})".format( - dim_summary(a), dim_summary(b) - ) + return f"Differing dimensions:\n ({dim_summary(a)}) != ({dim_summary(b)})" else: return "" @@ -826,9 +824,7 @@ def _compat_to_str(compat): def diff_array_repr(a, b, compat): # used for DataArray, Variable and IndexVariable summary = [ - "Left and right {} objects are not {}".format( - type(a).__name__, _compat_to_str(compat) - ) + f"Left and right {type(a).__name__} objects are not {_compat_to_str(compat)}" ] summary.append(diff_dim_summary(a, b)) @@ -859,9 +855,7 @@ def diff_array_repr(a, b, compat): def diff_dataset_repr(a, b, compat): summary = [ - "Left and right {} objects are not {}".format( - type(a).__name__, _compat_to_str(compat) - ) + f"Left and right {type(a).__name__} objects are not {_compat_to_str(compat)}" ] col_width = _calculate_col_width(set(list(a.variables) + list(b.variables))) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 7dbaf47b5cc..c6efaebc04c 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -66,9 +66,7 @@ def __call__(self, x): return self.f(x, **self.call_kwargs) def __repr__(self): - return "{type}: method={method}".format( - type=self.__class__.__name__, method=self.method - ) + return f"{self.__class__.__name__}: method={self.method}" class NumpyInterpolator(BaseInterpolator): diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 2f8612c5a9b..07c3c606bf2 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -406,9 +406,7 @@ def _wrapper( new_layers: collections.defaultdict[str, dict[Any, Any]] = collections.defaultdict( dict ) - gname = "{}-{}".format( - dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs) - ) + gname = f"{dask.utils.funcname(func)}-{dask.base.tokenize(npargs[0], args, kwargs)}" # map dims to list of chunk indexes ichunk = {dim: range(len(chunks_v)) for dim, chunks_v in input_chunks.items()} diff --git a/xarray/core/variable.py b/xarray/core/variable.py index d7c927bfd7a..720701be6f0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1083,9 +1083,7 @@ def _copy( ndata = as_compatible_data(data) if self.shape != ndata.shape: raise ValueError( - "Data shape {} must match shape of object {}".format( - ndata.shape, self.shape - ) + f"Data shape {ndata.shape} must match shape of object {self.shape}" ) attrs = copy.deepcopy(self._attrs, memo) if deep else copy.copy(self._attrs) @@ -3044,9 +3042,7 @@ def copy(self, deep: bool = True, data: ArrayLike | None = None): ndata = as_compatible_data(data) if self.shape != ndata.shape: raise ValueError( - "Data shape {} must match shape of object {}".format( - ndata.shape, self.shape - ) + f"Data shape {ndata.shape} must match shape of object {self.shape}" ) attrs = copy.deepcopy(self._attrs) if deep else copy.copy(self._attrs) diff --git a/xarray/testing.py b/xarray/testing.py index b6a88135ee1..dfd84851ac1 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -403,7 +403,5 @@ def _assert_internal_invariants( ) else: raise TypeError( - "{} is not a supported type for xarray invariant checks".format( - type(xarray_obj) - ) + f"{type(xarray_obj)} is not a supported type for xarray invariant checks" ) From 90d5cd4fab5aeb1a742da64b57ed5bece4e38ca8 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 17 Jul 2023 23:31:27 -0400 Subject: [PATCH 5/5] Move whats-new entry --- doc/whats-new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 326c41fbe49..682b8cf3066 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,9 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Add docstrings for the :py:class:`Index` base class and add some documentation on how to + create custom, Xarray-compatible indexes (:pull:`6975`) + By `Benoît Bovy `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -796,9 +799,6 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Add docstrings for the :py:class:`Index` base class and add some documentation on how to - create custom, Xarray-compatible indexes (:pull:`6975`) - By `Benoît Bovy `_. - Update merge docstrings. (:issue:`6935`, :pull:`7033`) By `Zach Moon `_. - Raise a more informative error when trying to open a non-existent zarr store. (:issue:`6484`, :pull:`7060`)