diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9e59fdc5b35..9c16fb74a7b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,8 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ +- Allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`). + By `Justus Magin `_. - Support for `dask.graph_manipulation `_ (requires dask >=2021.3) By `Guido Imperiale `_ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a4001c747bb..db45157e7c1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3917,6 +3917,7 @@ def merge( compat: str = "no_conflicts", join: str = "outer", fill_value: Any = dtypes.NA, + combine_attrs: str = "override", ) -> "Dataset": """Merge the arrays of two datasets into a single dataset. @@ -3945,7 +3946,6 @@ def merge( - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. - join : {"outer", "inner", "left", "right", "exact"}, optional Method for joining ``self`` and ``other`` along shared dimensions: @@ -3957,6 +3957,18 @@ def merge( fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill values. + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "override" + String indicating how to combine attrs of the objects being merged: + + - "drop": empty attrs on returned Dataset. + - "identical": all attrs must be the same on every object. + - "no_conflicts": attrs from all objects are combined, any that have + the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. + - "override": skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -3976,6 +3988,7 @@ def merge( compat=compat, join=join, fill_value=fill_value, + combine_attrs=combine_attrs, ) return self._replace(**merge_result._asdict()) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 14beeff3db5..ec95563bda9 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -893,6 +893,7 @@ def dataset_merge_method( compat: str, join: str, fill_value: Any, + combine_attrs: str, ) -> _MergeResult: """Guts of the Dataset.merge method.""" # we are locked into supporting overwrite_vars for the Dataset.merge @@ -922,7 +923,12 @@ def dataset_merge_method( priority_arg = 2 return merge_core( - objs, compat, join, priority_arg=priority_arg, fill_value=fill_value + objs, + compat, + join, + priority_arg=priority_arg, + fill_value=fill_value, + combine_attrs=combine_attrs, ) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 27e2b10dcbc..5b84eccca14 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -418,3 +418,34 @@ def test_merge_dataarray(self): da = xr.DataArray(data=1, name="b") assert_identical(ds.merge(da), xr.merge([ds, da])) + + @pytest.mark.parametrize( + ["combine_attrs", "attrs1", "attrs2", "expected_attrs", "expect_error"], + # don't need to test thoroughly + ( + ("drop", {"a": 0, "b": 1, "c": 2}, {"a": 1, "b": 2, "c": 3}, {}, False), + ( + "drop_conflicts", + {"a": 0, "b": 1, "c": 2}, + {"b": 2, "c": 2, "d": 3}, + {"a": 0, "c": 2, "d": 3}, + False, + ), + ("override", {"a": 0, "b": 1}, {"a": 1, "b": 2}, {"a": 0, "b": 1}, False), + ("no_conflicts", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True), + ("identical", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True), + ), + ) + def test_merge_combine_attrs( + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_error + ): + ds1 = xr.Dataset(attrs=attrs1) + ds2 = xr.Dataset(attrs=attrs2) + + if expect_error: + with pytest.raises(xr.MergeError): + ds1.merge(ds2, combine_attrs=combine_attrs) + else: + actual = ds1.merge(ds2, combine_attrs=combine_attrs) + expected = xr.Dataset(attrs=expected_attrs) + assert_identical(actual, expected)