Skip to content

Commit

Permalink
add a combine_attrs parameter to Dataset.merge (#4895)
Browse files Browse the repository at this point in the history
* add a combine_attrs kwarg to Dataset.merge

* document the new drop_conflicts value

* test that combine_attrs is passed through

* fix the documented default of combine_attrs

* update whats-new.rst

* minor fix [skip-ci]

* minor fix [skip-ci]

* remove a empty line [skip-ci]

* fix bad merge [skip-ci]

* fix bad merge [skip-ci]

* remove the blank line after rst lists [skip-ci]
  • Loading branch information
keewis authored Mar 6, 2021
1 parent 37522e9 commit 229829f
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ v0.17.1 (unreleased)

New Features
~~~~~~~~~~~~
- Allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`).
By `Justus Magin <https://github.com/keewis>`_.
- Support for `dask.graph_manipulation
<https://docs.dask.org/en/latest/graph_manipulation.html>`_ (requires dask >=2021.3)
By `Guido Imperiale <https://github.com/crusaderky>`_
Expand Down
15 changes: 14 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3917,6 +3917,7 @@ def merge(
compat: str = "no_conflicts",
join: str = "outer",
fill_value: Any = dtypes.NA,
combine_attrs: str = "override",
) -> "Dataset":
"""Merge the arrays of two datasets into a single dataset.
Expand Down Expand Up @@ -3945,7 +3946,6 @@ def merge(
- 'no_conflicts': only values which are not null in both datasets
must be equal. The returned dataset then contains the combination
of all non-null values.
join : {"outer", "inner", "left", "right", "exact"}, optional
Method for joining ``self`` and ``other`` along shared dimensions:
Expand All @@ -3957,6 +3957,18 @@ def merge(
fill_value : scalar or dict-like, optional
Value to use for newly missing values. If a dict-like, maps
variable names (including coordinates) to fill values.
combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
"override"}, default: "override"
String indicating how to combine attrs of the objects being merged:
- "drop": empty attrs on returned Dataset.
- "identical": all attrs must be the same on every object.
- "no_conflicts": attrs from all objects are combined, any that have
the same name must also have the same value.
- "drop_conflicts": attrs from all objects are combined, any that have
the same name but different values are dropped.
- "override": skip comparing and copy attrs from the first dataset to
the result.
Returns
-------
Expand All @@ -3976,6 +3988,7 @@ def merge(
compat=compat,
join=join,
fill_value=fill_value,
combine_attrs=combine_attrs,
)
return self._replace(**merge_result._asdict())

Expand Down
8 changes: 7 additions & 1 deletion xarray/core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,7 @@ def dataset_merge_method(
compat: str,
join: str,
fill_value: Any,
combine_attrs: str,
) -> _MergeResult:
"""Guts of the Dataset.merge method."""
# we are locked into supporting overwrite_vars for the Dataset.merge
Expand Down Expand Up @@ -922,7 +923,12 @@ def dataset_merge_method(
priority_arg = 2

return merge_core(
objs, compat, join, priority_arg=priority_arg, fill_value=fill_value
objs,
compat,
join,
priority_arg=priority_arg,
fill_value=fill_value,
combine_attrs=combine_attrs,
)


Expand Down
31 changes: 31 additions & 0 deletions xarray/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,3 +418,34 @@ def test_merge_dataarray(self):
da = xr.DataArray(data=1, name="b")

assert_identical(ds.merge(da), xr.merge([ds, da]))

@pytest.mark.parametrize(
["combine_attrs", "attrs1", "attrs2", "expected_attrs", "expect_error"],
# don't need to test thoroughly
(
("drop", {"a": 0, "b": 1, "c": 2}, {"a": 1, "b": 2, "c": 3}, {}, False),
(
"drop_conflicts",
{"a": 0, "b": 1, "c": 2},
{"b": 2, "c": 2, "d": 3},
{"a": 0, "c": 2, "d": 3},
False,
),
("override", {"a": 0, "b": 1}, {"a": 1, "b": 2}, {"a": 0, "b": 1}, False),
("no_conflicts", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True),
("identical", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True),
),
)
def test_merge_combine_attrs(
self, combine_attrs, attrs1, attrs2, expected_attrs, expect_error
):
ds1 = xr.Dataset(attrs=attrs1)
ds2 = xr.Dataset(attrs=attrs2)

if expect_error:
with pytest.raises(xr.MergeError):
ds1.merge(ds2, combine_attrs=combine_attrs)
else:
actual = ds1.merge(ds2, combine_attrs=combine_attrs)
expected = xr.Dataset(attrs=expected_attrs)
assert_identical(actual, expected)

0 comments on commit 229829f

Please sign in to comment.