Skip to content
forked from pydata/xarray

Commit

Permalink
FIX: h5py>=3 string decoding (pydata#4893)
Browse files Browse the repository at this point in the history
* FIX: set `decode_strings=True` for h5netcdf backend, convert object string to byte string if necessary, unpin h5py

* Update strings.py

* Update h5netcdf_.py

* fix style

* FIX:change decode_strings -> decode_vlen_strings, add whats-new.rst entry

* FIX: change missed decode_strings -> decode_vlen_strings

* FIX: set `decode_vlen_strings=True` in `open` classmethod, call remaining tests with `decode_vlen_strings=True`

* FIX: cover tests for h5py=2
  • Loading branch information
kmuehlbauer authored Feb 17, 2021
1 parent cdf7761 commit a8ed7ed
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 6 deletions.
2 changes: 1 addition & 1 deletion ci/requirements/environment-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
- dask
- distributed
- h5netcdf
- h5py=2
- h5py
- hdf5
- hypothesis
- iris
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- dask
- distributed
- h5netcdf
- h5py=2
- h5py
- hdf5
- hypothesis
- iris
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py38-all-but-dask.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
- cftime
- coveralls
- h5netcdf
- h5py=2
- h5py
- hdf5
- hypothesis
- lxml # Optional dep of pydap
Expand Down
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ Bug fixes
By `Leif Denby <https://github.com/leifdenby>`_.
- Fix time encoding bug associated with using cftime versions greater than
1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark <https://github.com/spencerkclark>`_.
- Fix decoding of vlen strings using h5py versions greater than 3.0.0 with h5netcdf backend (:issue:`4570`, :pull:`4893`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
7 changes: 7 additions & 0 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def open(
autoclose=False,
invalid_netcdf=None,
phony_dims=None,
decode_vlen_strings=True,
):

if isinstance(filename, bytes):
Expand All @@ -157,6 +158,10 @@ def open(
"h5netcdf backend keyword argument 'phony_dims' needs "
"h5netcdf >= 0.8.0."
)
if LooseVersion(h5netcdf.__version__) >= LooseVersion(
"0.10.0"
) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"):
kwargs["decode_vlen_strings"] = decode_vlen_strings

if lock is None:
if mode == "r":
Expand Down Expand Up @@ -358,6 +363,7 @@ def open_dataset(
lock=None,
invalid_netcdf=None,
phony_dims=None,
decode_vlen_strings=True,
):

store = H5NetCDFStore.open(
Expand All @@ -367,6 +373,7 @@ def open_dataset(
lock=lock,
invalid_netcdf=invalid_netcdf,
phony_dims=phony_dims,
decode_vlen_strings=decode_vlen_strings,
)

store_entrypoint = StoreBackendEntrypoint()
Expand Down
18 changes: 15 additions & 3 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2579,13 +2579,19 @@ def test_open_dataset_group(self):
v = group.createVariable("x", "int")
v[...] = 42

h5 = h5netcdf.File(tmp_file, mode="r")
kwargs = {}
if LooseVersion(h5netcdf.__version__) >= LooseVersion(
"0.10.0"
) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"):
kwargs = dict(decode_vlen_strings=True)

h5 = h5netcdf.File(tmp_file, mode="r", **kwargs)
store = backends.H5NetCDFStore(h5["g"])
with open_dataset(store) as ds:
expected = Dataset({"x": ((), 42)})
assert_identical(expected, ds)

h5 = h5netcdf.File(tmp_file, mode="r")
h5 = h5netcdf.File(tmp_file, mode="r", **kwargs)
store = backends.H5NetCDFStore(h5, group="g")
with open_dataset(store) as ds:
expected = Dataset({"x": ((), 42)})
Expand All @@ -2600,7 +2606,13 @@ def test_deepcopy(self):
v = nc.createVariable("y", np.int32, ("x",))
v[:] = np.arange(10)

h5 = h5netcdf.File(tmp_file, mode="r")
kwargs = {}
if LooseVersion(h5netcdf.__version__) >= LooseVersion(
"0.10.0"
) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"):
kwargs = dict(decode_vlen_strings=True)

h5 = h5netcdf.File(tmp_file, mode="r", **kwargs)
store = backends.H5NetCDFStore(h5)
with open_dataset(store) as ds:
copied = ds.copy(deep=True)
Expand Down

0 comments on commit a8ed7ed

Please sign in to comment.