-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix to_netcdf append bug (GH1215) #1609
Changes from 9 commits
231ad51
0a519a9
7f5e96f
d13d48c
3625035
779a4b1
1782108
0418264
d3e2b97
04671e6
faa5098
6b4a30d
d801940
6347cad
6ea5042
5a68bc5
92c49a8
0b1efd2
09101d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -963,7 +963,8 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None, | |
default format becomes NETCDF3_64BIT). | ||
mode : {'w', 'a'}, optional | ||
Write ('w') or append ('a') mode. If mode='w', any existing file at | ||
this location will be overwritten. | ||
this location will be overwritten. If mode='a', exisitng variables | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. exisitng -> existing |
||
will be overwritten. | ||
format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional | ||
File format for the resulting netCDF file: | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -823,6 +823,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, | |
autoclose=self.autoclose, **open_kwargs) as ds: | ||
yield ds | ||
|
||
@contextlib.contextmanager | ||
def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we put mode of these tests in one of the base classes, so we don't separately repeat them for scipy, netcdf4 and h5netcdf? |
||
allow_cleanup_failure=False): | ||
with create_tmp_file( | ||
allow_cleanup_failure=allow_cleanup_failure) as tmp_file: | ||
for i, key in enumerate(data.variables): | ||
mode = 'a' if i > 0 else 'w' | ||
data[[key]].to_netcdf(tmp_file, mode=mode, engine='netcdf4', | ||
**save_kwargs) | ||
with open_dataset(tmp_file, | ||
autoclose=self.autoclose, **open_kwargs) as ds: | ||
yield ds | ||
|
||
def test_append_write(self): | ||
# regression for GH1215 | ||
data = create_test_data() | ||
with self.roundtrip_append(data) as actual: | ||
self.assertDatasetIdentical(data, actual) | ||
|
||
def test_append_overwrite_values(self): | ||
data = create_test_data() | ||
with create_tmp_file(allow_cleanup_failure=False) as tmp_file: | ||
data.to_netcdf(tmp_file, mode='w', engine='netcdf4') | ||
data['var2'][:] = -999 | ||
data['var9'] = data['var2'] * 3 | ||
data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', | ||
engine='netcdf4') | ||
actual = open_dataset(tmp_file, autoclose=self.autoclose, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than allowing a clean-up failure, please close this file if possible (use a context manager). File descriptors are still a limited resource for our test suite. |
||
engine='netcdf4') | ||
assert_identical(data, actual) | ||
|
||
def test_variable_order(self): | ||
# doesn't work with scipy or h5py :( | ||
ds = Dataset() | ||
|
@@ -957,6 +988,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, | |
autoclose=self.autoclose, **open_kwargs) as ds: | ||
yield ds | ||
|
||
@contextlib.contextmanager | ||
def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, | ||
allow_cleanup_failure=False): | ||
with create_tmp_file( | ||
allow_cleanup_failure=allow_cleanup_failure) as tmp_file: | ||
for i, key in enumerate(data.variables): | ||
mode = 'a' if i > 0 else 'w' | ||
data[[key]].to_netcdf(tmp_file, mode=mode, engine='scipy', | ||
**save_kwargs) | ||
data.close() | ||
with open_dataset(tmp_file, | ||
engine='scipy', **open_kwargs) as ds: | ||
yield ds | ||
|
||
def test_append_write(self): | ||
# regression for GH1215 | ||
data = create_test_data() | ||
with self.roundtrip_append(data) as actual: | ||
assert_allclose(data, actual) | ||
|
||
def test_append_overwrite_values(self): | ||
data = create_test_data() | ||
with create_tmp_file(allow_cleanup_failure=False) as tmp_file: | ||
data.to_netcdf(tmp_file, mode='w', engine='scipy') | ||
data['var2'][:] = -999 | ||
data['var9'] = data['var2'] * 3 | ||
data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', | ||
engine='scipy') | ||
actual = open_dataset(tmp_file, engine='scipy') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is triggering a test failure on Windows, where you apparently can't open the same file twice. |
||
assert_allclose(data, actual) | ||
|
||
def test_array_attrs(self): | ||
ds = Dataset(attrs={'foo': [[1, 2], [3, 4]]}) | ||
with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): | ||
|
@@ -1137,6 +1199,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, | |
autoclose=self.autoclose, **open_kwargs) as ds: | ||
yield ds | ||
|
||
@contextlib.contextmanager | ||
def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, | ||
allow_cleanup_failure=False): | ||
with create_tmp_file( | ||
allow_cleanup_failure=allow_cleanup_failure) as tmp_file: | ||
for i, key in enumerate(data.variables): | ||
mode = 'a' if i > 0 else 'w' | ||
data[[key]].to_netcdf(tmp_file, mode=mode, engine='h5netcdf', | ||
**save_kwargs) | ||
with open_dataset(tmp_file, | ||
autoclose=self.autoclose, **open_kwargs) as ds: | ||
yield ds | ||
|
||
def test_append_write(self): | ||
# regression for GH1215 | ||
data = create_test_data() | ||
with self.roundtrip_append(data) as actual: | ||
self.assertDatasetIdentical(data, actual) | ||
|
||
def test_append_overwrite_values(self): | ||
data = create_test_data() | ||
with create_tmp_file(allow_cleanup_failure=False) as tmp_file: | ||
data.to_netcdf(tmp_file, mode='w', engine='h5netcdf') | ||
data['var2'][:] = -999 | ||
data['var9'] = data['var2'] * 3 | ||
data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', | ||
engine='h5netcdf') | ||
actual = open_dataset(tmp_file, autoclose=self.autoclose, | ||
engine='h5netcdf') | ||
assert_identical(data, actual) | ||
|
||
def test_orthogonal_indexing(self): | ||
# doesn't work for h5py (without using dask as an intermediate layer) | ||
pass | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we need the explicit check here for the mode here. With
mode='w'
, the file should already be starting from scratch.