diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 6de2bc8dc64..fc32d35837b 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -11,7 +11,7 @@ dependencies: - dask - distributed - h5netcdf - - h5py=2 + - h5py - hdf5 - hypothesis - iris diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 1bbe349ab21..36147c64c03 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -13,7 +13,7 @@ dependencies: - dask - distributed - h5netcdf - - h5py=2 + - h5py - hdf5 - hypothesis - iris diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 07dc6344a25..3f82990f3b5 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -14,7 +14,7 @@ dependencies: - cftime - coveralls - h5netcdf - - h5py=2 + - h5py - hdf5 - hypothesis - lxml # Optional dep of pydap diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ef4abb15129..1bca3aec68e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -118,6 +118,8 @@ Bug fixes By `Leif Denby `_. - Fix time encoding bug associated with using cftime versions greater than 1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark `_. +- Fix decoding of vlen strings using h5py versions greater than 3.0.0 with h5netcdf backend (:issue:`4570`, :pull:`4893`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index aa892c4f89c..5766b34d9bd 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -131,6 +131,7 @@ def open( autoclose=False, invalid_netcdf=None, phony_dims=None, + decode_vlen_strings=True, ): if isinstance(filename, bytes): @@ -157,6 +158,10 @@ def open( "h5netcdf backend keyword argument 'phony_dims' needs " "h5netcdf >= 0.8.0." ) + if LooseVersion(h5netcdf.__version__) >= LooseVersion( + "0.10.0" + ) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"): + kwargs["decode_vlen_strings"] = decode_vlen_strings if lock is None: if mode == "r": @@ -358,6 +363,7 @@ def open_dataset( lock=None, invalid_netcdf=None, phony_dims=None, + decode_vlen_strings=True, ): store = H5NetCDFStore.open( @@ -367,6 +373,7 @@ def open_dataset( lock=lock, invalid_netcdf=invalid_netcdf, phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, ) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 75e0edb4fb2..aefb91478cb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2579,13 +2579,19 @@ def test_open_dataset_group(self): v = group.createVariable("x", "int") v[...] = 42 - h5 = h5netcdf.File(tmp_file, mode="r") + kwargs = {} + if LooseVersion(h5netcdf.__version__) >= LooseVersion( + "0.10.0" + ) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"): + kwargs = dict(decode_vlen_strings=True) + + h5 = h5netcdf.File(tmp_file, mode="r", **kwargs) store = backends.H5NetCDFStore(h5["g"]) with open_dataset(store) as ds: expected = Dataset({"x": ((), 42)}) assert_identical(expected, ds) - h5 = h5netcdf.File(tmp_file, mode="r") + h5 = h5netcdf.File(tmp_file, mode="r", **kwargs) store = backends.H5NetCDFStore(h5, group="g") with open_dataset(store) as ds: expected = Dataset({"x": ((), 42)}) @@ -2600,7 +2606,13 @@ def test_deepcopy(self): v = nc.createVariable("y", np.int32, ("x",)) v[:] = np.arange(10) - h5 = h5netcdf.File(tmp_file, mode="r") + kwargs = {} + if LooseVersion(h5netcdf.__version__) >= LooseVersion( + "0.10.0" + ) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"): + kwargs = dict(decode_vlen_strings=True) + + h5 = h5netcdf.File(tmp_file, mode="r", **kwargs) store = backends.H5NetCDFStore(h5) with open_dataset(store) as ds: copied = ds.copy(deep=True)