pydata · shoyer · Oct 25, 2017 · Oct 4, 2017 · Oct 9, 2017 · Oct 9, 2017
diff --git a/doc/io.rst b/doc/io.rst
@@ -176,6 +176,10 @@ for dealing with datasets too big to fit into memory. Instead, xarray integrates
 with dask.array (see :ref:`dask`), which provides a fully featured engine for
 streaming computation.
 
+It is possible to append or overwrite netCDF variables using the ``mode='a'``
+argument. When using this option, all variables in the dataset will be written
+to the original netCDF file, regardless if they exist in the original dataset.
+
 .. _io.encoding:
 
 Reading encoded data
@@ -390,7 +394,7 @@ over the network until we look at particular values:
 
 Some servers require authentication before we can access the data. For this
 purpose we can explicitly create a :py:class:`~xarray.backends.PydapDataStore`
-and pass in a `Requests`__ session object. For example for 
+and pass in a `Requests`__ session object. For example for
 HTTP Basic authentication::
 
     import xarray as xr
@@ -403,7 +407,7 @@ HTTP Basic authentication::
                                             session=session)
     ds = xr.open_dataset(store)
 
-`Pydap's cas module`__ has functions that generate custom sessions for 
+`Pydap's cas module`__ has functions that generate custom sessions for
 servers that use CAS single sign-on. For example, to connect to servers
 that require NASA's URS authentication::
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -281,6 +281,10 @@ Bug fixes
   the first argument was a numpy variable (:issue:`1588`).
   By `Guido Imperiale <https://github.com/crusaderky>`_.
 
+- Fix bug in :py:meth:`~xarray.Dataset.to_netcdf` when writing in append mode
+ (:issue:`1215`).
+  By `Joe Hamman <https://github.com/jhamman>`_.
+
 - Fix ``netCDF4`` backend to properly roundtrip the ``shuffle`` encoding option
   (:issue:`1606`).
   By `Joe Hamman <https://github.com/jhamman>`_.

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -223,8 +223,13 @@ def set_variables(self, variables, check_encoding_set,
         for vn, v in iteritems(variables):
             name = _encode_variable_name(vn)
             check = vn in check_encoding_set
-            target, source = self.prepare_variable(
-                name, v, check, unlimited_dims=unlimited_dims)
+            if (vn not in self.variables or
+                    (getattr(self, '_mode', False) != 'a')):
+                target, source = self.prepare_variable(
+                    name, v, check, unlimited_dims=unlimited_dims)
+            else:
+                target, source = self.ds.variables[name], v.data
+
             self.writer.add(source, target)
 
     def set_necessary_dimensions(self, variable, unlimited_dims=None):

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -963,7 +963,8 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None,
             default format becomes NETCDF3_64BIT).
         mode : {'w', 'a'}, optional
             Write ('w') or append ('a') mode. If mode='w', any existing file at
-            this location will be overwritten.
+            this location will be overwritten. If mode='a', exisitng variables
+            will be overwritten.
         format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional
             File format for the resulting netCDF file:
 

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -823,6 +823,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={},
                               autoclose=self.autoclose, **open_kwargs) as ds:
                 yield ds
 
+    @contextlib.contextmanager
+    def roundtrip_append(self, data, save_kwargs={}, open_kwargs={},
+                         allow_cleanup_failure=False):
+        with create_tmp_file(
+                allow_cleanup_failure=allow_cleanup_failure) as tmp_file:
+            for i, key in enumerate(data.variables):
+                mode = 'a' if i > 0 else 'w'
+                data[[key]].to_netcdf(tmp_file, mode=mode, engine='netcdf4',
+                                      **save_kwargs)
+            with open_dataset(tmp_file,
+                              autoclose=self.autoclose, **open_kwargs) as ds:
+                yield ds
+
+    def test_append_write(self):
+        # regression for GH1215
+        data = create_test_data()
+        with self.roundtrip_append(data) as actual:
+            self.assertDatasetIdentical(data, actual)
+
+    def test_append_overwrite_values(self):
+        data = create_test_data()
+        with create_tmp_file(allow_cleanup_failure=False) as tmp_file:
+            data.to_netcdf(tmp_file, mode='w', engine='netcdf4')
+            data['var2'][:] = -999
+            data['var9'] = data['var2'] * 3
+            data[['var2', 'var9']].to_netcdf(tmp_file, mode='a',
+                                             engine='netcdf4')
+            actual = open_dataset(tmp_file, autoclose=self.autoclose,
+                                  engine='netcdf4')
+            assert_identical(data, actual)
+
     def test_variable_order(self):
         # doesn't work with scipy or h5py :(
         ds = Dataset()
@@ -957,6 +988,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={},
                               autoclose=self.autoclose, **open_kwargs) as ds:
                 yield ds
 
+    @contextlib.contextmanager
+    def roundtrip_append(self, data, save_kwargs={}, open_kwargs={},
+                         allow_cleanup_failure=False):
+        with create_tmp_file(
+                allow_cleanup_failure=allow_cleanup_failure) as tmp_file:
+            for i, key in enumerate(data.variables):
+                mode = 'a' if i > 0 else 'w'
+                data[[key]].to_netcdf(tmp_file, mode=mode, engine='scipy',
+                                      **save_kwargs)
+                data.close()
+            with open_dataset(tmp_file,
+                              engine='scipy', **open_kwargs) as ds:
+                yield ds
+
+    def test_append_write(self):
+        # regression for GH1215
+        data = create_test_data()
+        with self.roundtrip_append(data) as actual:
+            assert_allclose(data, actual)
+
+    def test_append_overwrite_values(self):
+        data = create_test_data()
+        with create_tmp_file(allow_cleanup_failure=False) as tmp_file:
+            data.to_netcdf(tmp_file, mode='w', engine='scipy')
+            data['var2'][:] = -999
+            data['var9'] = data['var2'] * 3
+            data[['var2', 'var9']].to_netcdf(tmp_file, mode='a',
+                                             engine='scipy')
+            actual = open_dataset(tmp_file, engine='scipy')
+            assert_allclose(data, actual)
+
     def test_array_attrs(self):
         ds = Dataset(attrs={'foo': [[1, 2], [3, 4]]})
         with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
@@ -1137,6 +1199,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={},
                               autoclose=self.autoclose, **open_kwargs) as ds:
                 yield ds
 
+    @contextlib.contextmanager
+    def roundtrip_append(self, data, save_kwargs={}, open_kwargs={},
+                         allow_cleanup_failure=False):
+        with create_tmp_file(
+                allow_cleanup_failure=allow_cleanup_failure) as tmp_file:
+            for i, key in enumerate(data.variables):
+                mode = 'a' if i > 0 else 'w'
+                data[[key]].to_netcdf(tmp_file, mode=mode, engine='h5netcdf',
+                                      **save_kwargs)
+            with open_dataset(tmp_file,
+                              autoclose=self.autoclose, **open_kwargs) as ds:
+                yield ds
+
+    def test_append_write(self):
+        # regression for GH1215
+        data = create_test_data()
+        with self.roundtrip_append(data) as actual:
+            self.assertDatasetIdentical(data, actual)
+
+    def test_append_overwrite_values(self):
+        data = create_test_data()
+        with create_tmp_file(allow_cleanup_failure=False) as tmp_file:
+            data.to_netcdf(tmp_file, mode='w', engine='h5netcdf')
+            data['var2'][:] = -999
+            data['var9'] = data['var2'] * 3
+            data[['var2', 'var9']].to_netcdf(tmp_file, mode='a',
+                                             engine='h5netcdf')
+            actual = open_dataset(tmp_file, autoclose=self.autoclose,
+                                  engine='h5netcdf')
+            assert_identical(data, actual)
+
     def test_orthogonal_indexing(self):
         # doesn't work for h5py (without using dask as an intermediate layer)
         pass