diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e62c7e87d44..ca50856a25e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,6 +59,11 @@ Enhancements formatted datetimes. By `Alan Brammer `_. - Add ``.str`` accessor to DataArrays for string related manipulations. By `0x0L `_. +- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims` + that allows ignoring errors if a passed label or dimension is not in the dataset + (:issue:`2994`). + By `Andrew Ross `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2746c32a8dc..4c3dcc2781a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1462,7 +1462,7 @@ def transpose(self, *dims, transpose_coords=None) -> 'DataArray': def T(self) -> 'DataArray': return self.transpose() - def drop(self, labels, dim=None): + def drop(self, labels, dim=None, *, errors='raise'): """Drop coordinates or index labels from this DataArray. Parameters @@ -1472,14 +1472,18 @@ def drop(self, labels, dim=None): dim : str, optional Dimension along which to drop index labels. By default (if ``dim is None``), drops coordinates rather than index labels. - + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if + any of the coordinates or index labels passed are not + in the array. If 'ignore', any given labels that are in the + array are dropped and no error is raised. Returns ------- dropped : DataArray """ if utils.is_scalar(labels): labels = [labels] - ds = self._to_temp_dataset().drop(labels, dim) + ds = self._to_temp_dataset().drop(labels, dim, errors=errors) return self._from_temp_dataset(ds) def dropna(self, dim, how='any', thresh=None): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 026be5ba4b0..13a6a6ee9b2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2826,7 +2826,7 @@ def _assert_all_in_dataset(self, names, virtual_okay=False): raise ValueError('One or more of the specified variables ' 'cannot be found in this dataset') - def drop(self, labels, dim=None): + def drop(self, labels, dim=None, *, errors='raise'): """Drop variables or index labels from this dataset. Parameters @@ -2836,33 +2836,41 @@ def drop(self, labels, dim=None): dim : None or str, optional Dimension along which to drop index labels. By default (if ``dim is None``), drops variables rather than index labels. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if + any of the variable or index labels passed are not + in the dataset. If 'ignore', any given labels that are in the + dataset are dropped and no error is raised. Returns ------- dropped : Dataset """ + if errors not in ['raise', 'ignore']: + raise ValueError('errors must be either "raise" or "ignore"') if utils.is_scalar(labels): labels = [labels] if dim is None: - return self._drop_vars(labels) + return self._drop_vars(labels, errors=errors) else: try: index = self.indexes[dim] except KeyError: raise ValueError( 'dimension %r does not have coordinate labels' % dim) - new_index = index.drop(labels) + new_index = index.drop(labels, errors=errors) return self.loc[{dim: new_index}] - def _drop_vars(self, names): - self._assert_all_in_dataset(names) + def _drop_vars(self, names, errors='raise'): + if errors == 'raise': + self._assert_all_in_dataset(names) drop = set(names) variables = OrderedDict((k, v) for k, v in self._variables.items() if k not in drop) coord_names = set(k for k in self._coord_names if k in variables) return self._replace_vars_and_dims(variables, coord_names) - def drop_dims(self, drop_dims): + def drop_dims(self, drop_dims, *, errors='raise'): """Drop dimensions and associated variables from this dataset. Parameters @@ -2875,14 +2883,23 @@ def drop_dims(self, drop_dims): obj : Dataset The dataset without the given dimensions (or any variables containing those dimensions) + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if + any of the dimensions passed are not + in the dataset. If 'ignore', any given dimensions that are in the + dataset are dropped and no error is raised. """ + if errors not in ['raise', 'ignore']: + raise ValueError('errors must be either "raise" or "ignore"') + if utils.is_scalar(drop_dims): drop_dims = [drop_dims] - missing_dimensions = [d for d in drop_dims if d not in self.dims] - if missing_dimensions: - raise ValueError('Dataset does not contain the dimensions: %s' - % missing_dimensions) + if errors == 'raise': + missing_dimensions = [d for d in drop_dims if d not in self.dims] + if missing_dimensions: + raise ValueError('Dataset does not contain the dimensions: %s' + % missing_dimensions) drop_vars = set(k for k, v in self._variables.items() for d in v.dims if d in drop_dims) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fd9076e7f65..a8825055479 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1859,19 +1859,34 @@ def test_drop_coordinates(self): with pytest.raises(ValueError): arr.drop('not found') + actual = expected.drop('not found', errors='ignore') + assert_identical(actual, expected) + with raises_regex(ValueError, 'cannot be found'): arr.drop(None) + actual = expected.drop(None, errors='ignore') + assert_identical(actual, expected) + renamed = arr.rename('foo') with raises_regex(ValueError, 'cannot be found'): renamed.drop('foo') + actual = renamed.drop('foo', errors='ignore') + assert_identical(actual, renamed) + def test_drop_index_labels(self): arr = DataArray(np.random.randn(2, 3), coords={'y': [0, 1, 2]}, dims=['x', 'y']) actual = arr.drop([0, 1], dim='y') expected = arr[:, 2:] - assert_identical(expected, actual) + assert_identical(actual, expected) + + with raises_regex((KeyError, ValueError), 'not .* in axis'): + actual = arr.drop([0, 1, 3], dim='y') + + actual = arr.drop([0, 1, 3], dim='y', errors='ignore') + assert_identical(actual, expected) def test_dropna(self): x = np.random.randn(4, 4) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5aae56485ce..8cd129e35de 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1889,6 +1889,15 @@ def test_drop_variables(self): with raises_regex(ValueError, 'cannot be found'): data.drop('not_found_here') + actual = data.drop('not_found_here', errors='ignore') + assert_identical(data, actual) + + actual = data.drop(['not_found_here'], errors='ignore') + assert_identical(data, actual) + + actual = data.drop(['time', 'not_found_here'], errors='ignore') + assert_identical(expected, actual) + def test_drop_index_labels(self): data = Dataset({'A': (['x', 'y'], np.random.randn(2, 3)), 'x': ['a', 'b']}) @@ -1907,6 +1916,16 @@ def test_drop_index_labels(self): # not contained in axis data.drop(['c'], dim='x') + actual = data.drop(['c'], dim='x', errors='ignore') + assert_identical(data, actual) + + with pytest.raises(ValueError): + data.drop(['c'], dim='x', errors='wrong_value') + + actual = data.drop(['a', 'b', 'c'], 'x', errors='ignore') + expected = data.isel(x=slice(0, 0)) + assert_identical(expected, actual) + with raises_regex( ValueError, 'does not have coordinate labels'): data.drop(1, 'y') @@ -1931,6 +1950,22 @@ def test_drop_dims(self): with pytest.raises((ValueError, KeyError)): data.drop_dims('z') # not a dimension + with pytest.raises((ValueError, KeyError)): + data.drop_dims(None) + + actual = data.drop_dims('z', errors='ignore') + assert_identical(data, actual) + + actual = data.drop_dims(None, errors='ignore') + assert_identical(data, actual) + + with pytest.raises(ValueError): + actual = data.drop_dims('z', errors='wrong_value') + + actual = data.drop_dims(['x', 'y', 'z'], errors='ignore') + expected = data.drop(['A', 'B', 'x']) + assert_identical(expected, actual) + def test_copy(self): data = create_test_data() data.attrs['Test'] = [1, 2, 3]