From 3c51994323009dafc81c104dac3c0d1582987b06 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 1 Oct 2019 15:12:06 -0600 Subject: [PATCH 1/4] Make concat more forgiving with variables that are being merged. --- doc/whats-new.rst | 2 ++ xarray/core/concat.py | 14 ++++---------- xarray/tests/test_combine.py | 7 +++---- xarray/tests/test_concat.py | 25 ++++++++++++++++++++----- 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f9c952f6752..ca58c4d5f57 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,8 @@ Bug fixes - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord now plot the correct data for 2D DataArrays. (:issue:`3334`). By `Tom Nicholas `_. +- Make :py:func:`~xarray.concat` more robust when merging variables present in some datasets but + not others (:issue:`508`). By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/concat.py b/xarray/core/concat.py index e68c247d880..4265a9cf1e7 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -315,16 +315,10 @@ def _dataset_concat( if variables_to_merge: to_merge = {var: [] for var in variables_to_merge} - for ds in datasets: - absent_merge_vars = variables_to_merge - set(ds.variables) - if absent_merge_vars: - raise ValueError( - "variables %r are present in some datasets but not others. " - % absent_merge_vars - ) - - for var in variables_to_merge: - to_merge[var].append(ds.variables[var]) + for var in variables_to_merge: + for ds in datasets: + if var in ds: + to_merge[var].append(ds.variables[var]) for var in variables_to_merge: result_vars[var] = unique_variable( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 6037669ac07..e7769110c24 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -784,11 +784,10 @@ def test_auto_combine_previously_failed(self): assert_identical(expected, actual) def test_auto_combine_still_fails(self): - # concat can't handle new variables (yet): - # https://github.com/pydata/xarray/issues/508 datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})] - with pytest.raises(ValueError): - auto_combine(datasets, "y") + actual = auto_combine(datasets, "y") + expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1}) + assert_identical(expected, actual) def test_auto_combine_no_concat(self): objs = [Dataset({"x": 0}), Dataset({"y": 1})] diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 00428f70966..0ee54a8156a 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -65,6 +65,26 @@ def test_concat_simple(self, data, dim, coords): datasets = [g for _, g in data.groupby(dim, squeeze=False)] assert_identical(data, concat(datasets, dim, coords=coords)) + @pytest.mark.parametrize( + "compat", + ["equals", "broadcast_equals", "no_conflicts", "override", "identical"], + ) + def test_concat_merge_variables_present_in_some_datasets(self, data, compat): + # coordinates present in some datasets but not others + ds1 = Dataset(data_vars={"a": ("y", [0.1])}, coords={"x": 0.1}) + ds2 = Dataset(data_vars={"a": ("y", [0.2])}, coords={"z": 0.2}) + actual = concat([ds1, ds2], dim="y", coords="minimal", compat=compat) + expected = Dataset({"a": ("y", [0.1, 0.2])}, coords={"x": 0.1, "z": 0.2}) + assert_identical(expected, actual) + + # data variables present in some datasets but not others + split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] + data0, data1 = deepcopy(split_data) + data1["foo"] = ("bar", np.random.randn(10)) + actual = concat([data0, data1], "dim1") + expected = data.copy().assign(foo=data1.foo) + assert_identical(expected, actual) + def test_concat_2(self, data): dim = "dim2" datasets = [g for _, g in data.groupby(dim, squeeze=True)] @@ -183,11 +203,6 @@ def test_concat_errors(self): concat([data0, data1], "dim1", compat="identical") assert_identical(data, concat([data0, data1], "dim1", compat="equals")) - with raises_regex(ValueError, "present in some datasets"): - data0, data1 = deepcopy(split_data) - data1["foo"] = ("bar", np.random.randn(10)) - concat([data0, data1], "dim1") - with raises_regex(ValueError, "compat.* invalid"): concat(split_data, "dim1", compat="foobar") From 575e882370b310c03791d5e051abb67e55b99312 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 1 Oct 2019 15:45:42 -0600 Subject: [PATCH 2/4] rename test. --- xarray/tests/test_combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index e7769110c24..5e25085920d 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -783,7 +783,7 @@ def test_auto_combine_previously_failed(self): actual = auto_combine(datasets, concat_dim="t") assert_identical(expected, actual) - def test_auto_combine_still_fails(self): + def test_auto_combine_with_new_variables(self): datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})] actual = auto_combine(datasets, "y") expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1}) From 649751b591f69e3f9583715b4315f070ffd2ad1b Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 1 Oct 2019 16:05:00 -0600 Subject: [PATCH 3/4] simplify test. --- xarray/tests/test_concat.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 0ee54a8156a..ee932d5f7f5 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -65,15 +65,11 @@ def test_concat_simple(self, data, dim, coords): datasets = [g for _, g in data.groupby(dim, squeeze=False)] assert_identical(data, concat(datasets, dim, coords=coords)) - @pytest.mark.parametrize( - "compat", - ["equals", "broadcast_equals", "no_conflicts", "override", "identical"], - ) - def test_concat_merge_variables_present_in_some_datasets(self, data, compat): + def test_concat_merge_variables_present_in_some_datasets(self, data): # coordinates present in some datasets but not others ds1 = Dataset(data_vars={"a": ("y", [0.1])}, coords={"x": 0.1}) ds2 = Dataset(data_vars={"a": ("y", [0.2])}, coords={"z": 0.2}) - actual = concat([ds1, ds2], dim="y", coords="minimal", compat=compat) + actual = concat([ds1, ds2], dim="y", coords="minimal") expected = Dataset({"a": ("y", [0.1, 0.2])}, coords={"x": 0.1, "z": 0.2}) assert_identical(expected, actual) From 42fd5fc444df51ed02b70dccc4e24ea74f743084 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 1 Oct 2019 16:05:57 -0600 Subject: [PATCH 4/4] make diff smaller. --- xarray/core/concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 4265a9cf1e7..be2eb5620c5 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -315,8 +315,8 @@ def _dataset_concat( if variables_to_merge: to_merge = {var: [] for var in variables_to_merge} - for var in variables_to_merge: - for ds in datasets: + for ds in datasets: + for var in variables_to_merge: if var in ds: to_merge[var].append(ds.variables[var])