pydata · TomNicholas · Jan 11, 2020 · Nov 8, 2019 · Nov 8, 2019 · Nov 11, 2019
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -37,6 +37,9 @@ New Features
 - Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen`
   and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`)
   By `Deepak Cherian <https://github.com/dcherian>`_
+- Add `attrs_file` option in :py:func:`~xarray.open_mfdataset` to choose the
+  source file for global attributes in a multi-file dataset (:issue:`2382`,
+  :pull:`3498`) by `Julien Seguinot <https://github.com/juseg>_`.
 - :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`
   now allow swapping to dimension names that don't exist yet. (:pull:`3636`)
   By `Justus Magin <https://github.com/keewis>`_.

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -718,6 +718,7 @@ def open_mfdataset(
     autoclose=None,
     parallel=False,
     join="outer",
+    attrs_file=None,
     **kwargs,
 ):
     """Open multiple files as a single dataset.
@@ -729,8 +730,8 @@ def open_mfdataset(
     ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
     ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
     ``combine='nested'`` in future. Requires dask to be installed. See documentation for
-    details on dask [1]_. Attributes from the first dataset file are used for the
-    combined dataset.
+    details on dask [1]_. Global attributes from the ``attrs_file`` are used
+    for the combined dataset.
 
     Parameters
     ----------
@@ -827,6 +828,10 @@ def open_mfdataset(
         - 'override': if indexes are of same size, rewrite indexes to be
           those of the first object with that dimension. Indexes for the same
           dimension must have the same size in all objects.
+    attrs_file : str or pathlib.Path, optional
+        Path of the file used to read global attributes from.
+        By default global attributes are read from the first file provided,
+        with wildcard matches sorted by filename.
     **kwargs : optional
         Additional arguments passed on to :py:func:`xarray.open_dataset`.
 
@@ -961,7 +966,15 @@ def open_mfdataset(
         raise
 
     combined._file_obj = _MultiFileCloser(file_objs)
-    combined.attrs = datasets[0].attrs
+
+    # read global attributes from the attrs_file or from the first dataset
+    if attrs_file is not None:
+        if isinstance(attrs_file, Path):
+            attrs_file = str(attrs_file)
+        combined.attrs = datasets[paths.index(attrs_file)].attrs
+    else:
+        combined.attrs = datasets[0].attrs
+
     return combined
 
 

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -2832,6 +2832,42 @@ def test_attrs_mfdataset(self):
                     with raises_regex(AttributeError, "no attribute"):
                         actual.test2
 
+    def test_open_mfdataset_attrs_file(self):
+        original = Dataset({"foo": ("x", np.random.randn(10))})
+        with create_tmp_files(2) as (tmp1, tmp2):
+            ds1 = original.isel(x=slice(5))
+            ds2 = original.isel(x=slice(5, 10))
+            ds1.attrs["test1"] = "foo"
+            ds2.attrs["test2"] = "bar"
+            ds1.to_netcdf(tmp1)
+            ds2.to_netcdf(tmp2)
+            with open_mfdataset(
+                [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
+            ) as actual:
+                # attributes are inherited from the master file
+                assert actual.attrs["test2"] == ds2.attrs["test2"]
+                # attributes from ds1 are not retained, e.g.,
+                assert "test1" not in actual.attrs
+
+    def test_open_mfdataset_attrs_file_path(self):
+        original = Dataset({"foo": ("x", np.random.randn(10))})
+        with create_tmp_files(2) as (tmp1, tmp2):
+            tmp1 = Path(tmp1)
+            tmp2 = Path(tmp2)
+            ds1 = original.isel(x=slice(5))
+            ds2 = original.isel(x=slice(5, 10))
+            ds1.attrs["test1"] = "foo"
+            ds2.attrs["test2"] = "bar"
+            ds1.to_netcdf(tmp1)
+            ds2.to_netcdf(tmp2)
+            with open_mfdataset(
+                [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
+            ) as actual:
+                # attributes are inherited from the master file
+                assert actual.attrs["test2"] == ds2.attrs["test2"]
+                # attributes from ds1 are not retained, e.g.,
+                assert "test1" not in actual.attrs
+
     def test_open_mfdataset_auto_combine(self):
         original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)})
         with create_tmp_file() as tmp1: