Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into integrate_dim_arg
Browse files Browse the repository at this point in the history
* upstream/master:
  Bugfix in list_engine (pydata#4811)
  Add drop_isel (pydata#4819)
  Fix RST.
  Remove the references to `_file_obj` outside low level code paths, change to `_close` (pydata#4809)
  • Loading branch information
dcherian committed Jan 29, 2021
2 parents 4614d76 + 7dbbdca commit 54a5fce
Show file tree
Hide file tree
Showing 23 changed files with 327 additions and 87 deletions.
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ Indexing
Dataset.isel
Dataset.sel
Dataset.drop_sel
Dataset.drop_isel
Dataset.head
Dataset.tail
Dataset.thin
Expand Down Expand Up @@ -307,6 +308,7 @@ Indexing
DataArray.isel
DataArray.sel
DataArray.drop_sel
DataArray.drop_isel
DataArray.head
DataArray.tail
DataArray.thin
Expand Down
7 changes: 5 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ Breaking changes
always be set such that ``int64`` values can be used. In the past, no units
finer than "seconds" were chosen, which would sometimes mean that ``float64``
values were required, which would lead to inaccurate I/O round-trips.
- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull: `4725`).
By `Aureliana Barghini <https://github.com/aurghs>`_
- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull:`4725`).
By `Aureliana Barghini <https://github.com/aurghs>`_.

Deprecations
~~~~~~~~~~~~
Expand Down Expand Up @@ -87,6 +87,7 @@ Bug fixes
- Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and
:py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`).
By `Julien Seguinot <https://github.com/juseg>`_.
- Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo <https://github.com/mesejo>`_.

Documentation
~~~~~~~~~~~~~
Expand Down Expand Up @@ -115,6 +116,8 @@ Internal Changes
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Speed up attribute style access (e.g. ``ds.somevar`` instead of ``ds["somevar"]``) and tab completion
in ipython (:issue:`4741`, :pull:`4742`). By `Richard Kleijn <https://github.com/rhkleijn>`_.
- Added the ``set_close`` method to ``Dataset`` and ``DataArray`` for beckends to specify how to voluntary release
all resources. (:pull:`#4809`), By `Alessandro Amici <https://github.com/alexamici>`_.

.. _whats-new.0.16.2:

Expand Down
25 changes: 9 additions & 16 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def maybe_decode_store(store, chunks):

else:
ds2 = ds
ds2._file_obj = ds._file_obj
ds2.set_close(ds._close)
return ds2

filename_or_obj = _normalize_path(filename_or_obj)
Expand Down Expand Up @@ -701,7 +701,7 @@ def open_dataarray(
else:
(data_array,) = dataset.data_vars.values()

data_array._file_obj = dataset._file_obj
data_array.set_close(dataset._close)

# Reset names if they were changed during saving
# to ensure that we can 'roundtrip' perfectly
Expand All @@ -715,17 +715,6 @@ def open_dataarray(
return data_array


class _MultiFileCloser:
__slots__ = ("file_objs",)

def __init__(self, file_objs):
self.file_objs = file_objs

def close(self):
for f in self.file_objs:
f.close()


def open_mfdataset(
paths,
chunks=None,
Expand Down Expand Up @@ -918,14 +907,14 @@ def open_mfdataset(
getattr_ = getattr

datasets = [open_(p, **open_kwargs) for p in paths]
file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
closers = [getattr_(ds, "_close") for ds in datasets]
if preprocess is not None:
datasets = [preprocess(ds) for ds in datasets]

if parallel:
# calling compute here will return the datasets/file_objs lists,
# the underlying datasets will still be stored as dask arrays
datasets, file_objs = dask.compute(datasets, file_objs)
datasets, closers = dask.compute(datasets, closers)

# Combine all datasets, closing them in case of a ValueError
try:
Expand Down Expand Up @@ -963,7 +952,11 @@ def open_mfdataset(
ds.close()
raise

combined._file_obj = _MultiFileCloser(file_objs)
def multi_file_closer():
for closer in closers:
closer()

combined.set_close(multi_file_closer)

# read global attributes from the attrs_file or from the first dataset
if attrs_file is not None:
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/apiv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _dataset_from_backend_dataset(
**extra_tokens,
)

ds._file_obj = backend_ds._file_obj
ds.set_close(backend_ds._close)

# Ensure source filename always stored in dataset object (GH issue #2550)
if "source" not in ds.encoding:
Expand Down
20 changes: 18 additions & 2 deletions xarray/backends/cfgrib_.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,23 @@
from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.variable import Variable
from .common import AbstractDataStore, BackendArray, BackendEntrypoint
from .common import (
BACKEND_ENTRYPOINTS,
AbstractDataStore,
BackendArray,
BackendEntrypoint,
)
from .locks import SerializableLock, ensure_lock
from .store import open_backend_dataset_store

try:
import cfgrib

has_cfgrib = True
except ModuleNotFoundError:
has_cfgrib = False


# FIXME: Add a dedicated lock, even if ecCodes is supposed to be thread-safe
# in most circumstances. See:
# https://confluence.ecmwf.int/display/ECC/Frequently+Asked+Questions
Expand Down Expand Up @@ -38,7 +51,6 @@ class CfGribDataStore(AbstractDataStore):
"""

def __init__(self, filename, lock=None, **backend_kwargs):
import cfgrib

if lock is None:
lock = ECCODES_LOCK
Expand Down Expand Up @@ -129,3 +141,7 @@ def open_backend_dataset_cfgrib(
cfgrib_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_cfgrib, guess_can_open=guess_can_open_cfgrib
)


if has_cfgrib:
BACKEND_ENTRYPOINTS["cfgrib"] = cfgrib_backend
4 changes: 4 additions & 0 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import time
import traceback
from typing import Dict

import numpy as np

Expand Down Expand Up @@ -349,3 +350,6 @@ def __init__(self, open_dataset, open_dataset_parameters=None, guess_can_open=No
self.open_dataset = open_dataset
self.open_dataset_parameters = open_dataset_parameters
self.guess_can_open = guess_can_open


BACKEND_ENTRYPOINTS: Dict[str, BackendEntrypoint] = {}
20 changes: 16 additions & 4 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
from ..core import indexing
from ..core.utils import FrozenDict, is_remote_uri, read_magic_number
from ..core.variable import Variable
from .common import BackendEntrypoint, WritableCFDataStore, find_root_and_group
from .common import (
BACKEND_ENTRYPOINTS,
BackendEntrypoint,
WritableCFDataStore,
find_root_and_group,
)
from .file_manager import CachingFileManager, DummyFileManager
from .locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock
from .netCDF4_ import (
Expand All @@ -20,6 +25,13 @@
)
from .store import open_backend_dataset_store

try:
import h5netcdf

has_h5netcdf = True
except ModuleNotFoundError:
has_h5netcdf = False


class H5NetCDFArrayWrapper(BaseNetCDF4Array):
def get_array(self, needs_lock=True):
Expand Down Expand Up @@ -85,8 +97,6 @@ class H5NetCDFStore(WritableCFDataStore):

def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False):

import h5netcdf

if isinstance(manager, (h5netcdf.File, h5netcdf.Group)):
if group is None:
root, group = find_root_and_group(manager)
Expand Down Expand Up @@ -122,7 +132,6 @@ def open(
invalid_netcdf=None,
phony_dims=None,
):
import h5netcdf

if isinstance(filename, bytes):
raise ValueError(
Expand Down Expand Up @@ -375,3 +384,6 @@ def open_backend_dataset_h5netcdf(
h5netcdf_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_h5netcdf, guess_can_open=guess_can_open_h5netcdf
)

if has_h5netcdf:
BACKEND_ENTRYPOINTS["h5netcdf"] = h5netcdf_backend
15 changes: 13 additions & 2 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ..core.utils import FrozenDict, close_on_error, is_remote_uri
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
BackendArray,
BackendEntrypoint,
WritableCFDataStore,
Expand All @@ -23,6 +24,14 @@
from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable
from .store import open_backend_dataset_store

try:
import netCDF4

has_netcdf4 = True
except ModuleNotFoundError:
has_netcdf4 = False


# This lookup table maps from dtype.byteorder to a readable endian
# string used by netCDF4.
_endian_lookup = {"=": "native", ">": "big", "<": "little", "|": "native"}
Expand Down Expand Up @@ -298,7 +307,6 @@ class NetCDF4DataStore(WritableCFDataStore):
def __init__(
self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
):
import netCDF4

if isinstance(manager, netCDF4.Dataset):
if group is None:
Expand Down Expand Up @@ -335,7 +343,6 @@ def open(
lock_maker=None,
autoclose=False,
):
import netCDF4

if isinstance(filename, pathlib.Path):
filename = os.fspath(filename)
Expand Down Expand Up @@ -563,3 +570,7 @@ def open_backend_dataset_netcdf4(
netcdf4_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_netcdf4, guess_can_open=guess_can_open_netcdf4
)


if has_netcdf4:
BACKEND_ENTRYPOINTS["netcdf4"] = netcdf4_backend
24 changes: 1 addition & 23 deletions xarray/backends/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,11 @@
import inspect
import itertools
import logging
import typing as T
import warnings

import pkg_resources

from .cfgrib_ import cfgrib_backend
from .common import BackendEntrypoint
from .h5netcdf_ import h5netcdf_backend
from .netCDF4_ import netcdf4_backend
from .pseudonetcdf_ import pseudonetcdf_backend
from .pydap_ import pydap_backend
from .pynio_ import pynio_backend
from .scipy_ import scipy_backend
from .store import store_backend
from .zarr import zarr_backend

BACKEND_ENTRYPOINTS: T.Dict[str, BackendEntrypoint] = {
"store": store_backend,
"netcdf4": netcdf4_backend,
"h5netcdf": h5netcdf_backend,
"scipy": scipy_backend,
"pseudonetcdf": pseudonetcdf_backend,
"zarr": zarr_backend,
"cfgrib": cfgrib_backend,
"pydap": pydap_backend,
"pynio": pynio_backend,
}
from .common import BACKEND_ENTRYPOINTS


def remove_duplicates(backend_entrypoints):
Expand Down
20 changes: 18 additions & 2 deletions xarray/backends/pseudonetcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,24 @@
from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.variable import Variable
from .common import AbstractDataStore, BackendArray, BackendEntrypoint
from .common import (
BACKEND_ENTRYPOINTS,
AbstractDataStore,
BackendArray,
BackendEntrypoint,
)
from .file_manager import CachingFileManager
from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock
from .store import open_backend_dataset_store

try:
from PseudoNetCDF import pncopen

has_pseudonetcdf = True
except ModuleNotFoundError:
has_pseudonetcdf = False


# psuedonetcdf can invoke netCDF libraries internally
PNETCDF_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK])

Expand Down Expand Up @@ -40,7 +53,6 @@ class PseudoNetCDFDataStore(AbstractDataStore):

@classmethod
def open(cls, filename, lock=None, mode=None, **format_kwargs):
from PseudoNetCDF import pncopen

keywords = {"kwargs": format_kwargs}
# only include mode if explicitly passed
Expand Down Expand Up @@ -138,3 +150,7 @@ def open_backend_dataset_pseudonetcdf(
open_dataset=open_backend_dataset_pseudonetcdf,
open_dataset_parameters=open_dataset_parameters,
)


if has_pseudonetcdf:
BACKEND_ENTRYPOINTS["pseudonetcdf"] = pseudonetcdf_backend
20 changes: 18 additions & 2 deletions xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,22 @@
from ..core.pycompat import integer_types
from ..core.utils import Frozen, FrozenDict, close_on_error, is_dict_like, is_remote_uri
from ..core.variable import Variable
from .common import AbstractDataStore, BackendArray, BackendEntrypoint, robust_getitem
from .common import (
BACKEND_ENTRYPOINTS,
AbstractDataStore,
BackendArray,
BackendEntrypoint,
robust_getitem,
)
from .store import open_backend_dataset_store

try:
import pydap.client

has_pydap = True
except ModuleNotFoundError:
has_pydap = False


class PydapArrayWrapper(BackendArray):
def __init__(self, array):
Expand Down Expand Up @@ -74,7 +87,6 @@ def __init__(self, ds):

@classmethod
def open(cls, url, session=None):
import pydap.client

ds = pydap.client.open_url(url, session=session)
return cls(ds)
Expand Down Expand Up @@ -133,3 +145,7 @@ def open_backend_dataset_pydap(
pydap_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_pydap, guess_can_open=guess_can_open_pydap
)


if has_pydap:
BACKEND_ENTRYPOINTS["pydap"] = pydap_backend
Loading

0 comments on commit 54a5fce

Please sign in to comment.