Merge branch 'master' into join-override

* master: pyupgrade one-off run (pydata#3190) mfdataset, concat now support the 'join' kwarg. (pydata#3102) reduce the size of example dataset in dask docs (pydata#3187) add climpred to related-projects (pydata#3188) bump rasterio to 1.0.24 in doc building environment (pydata#3186) More annotations (pydata#3177) Support for __array_function__ implementers (sparse arrays) [WIP] (pydata#3117) Internal clean-up of isnull() to avoid relying on pandas (pydata#3132) Call darray.compute() in plot() (pydata#3183) BUG: fix + test open_mfdataset fails on variable attributes with list… (pydata#3181)
dcherian · Aug 7, 2019 · f62ca52 · f62ca52
2 parents a26b1a5 + 8a9c471
commit f62ca52
Show file tree

Hide file tree

Showing 48 changed files with 1,488 additions and 317 deletions.
diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
@@ -21,6 +21,7 @@ dependencies:
   - pip
   - scipy
   - seaborn
+  - sparse
   - toolz
   - rasterio
   - boto3

diff --git a/doc/dask.rst b/doc/dask.rst
@@ -58,9 +58,9 @@ argument to :py:func:`~xarray.open_dataset` or using the
     np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3)
 
     ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'),
-                                     np.random.randn(365, 180, 360)),
-                     'time': pd.date_range('2015-01-01', periods=365),
-                     'longitude': np.arange(360),
+                                     np.random.randn(30, 180, 180)),
+                     'time': pd.date_range('2015-01-01', periods=30),
+                     'longitude': np.arange(180),
                      'latitude': np.arange(89.5, -90.5, -1)})
     ds.to_netcdf('example-data.nc')
 

diff --git a/doc/environment.yml b/doc/environment.yml
@@ -12,7 +12,7 @@ dependencies:
   - ipython=7.2.0
   - netCDF4=1.4.2
   - cartopy=0.17.0
-  - rasterio=1.0.13
+  - rasterio=1.0.24
   - zarr=2.2.0
   - iris=2.2.0
   - flake8=3.6.0

diff --git a/doc/related-projects.rst b/doc/related-projects.rst
@@ -11,6 +11,7 @@ Geosciences
 ~~~~~~~~~~~
 
 - `aospy <https://aospy.readthedocs.io>`_: Automated analysis and management of gridded climate data.
+- `climpred <https://climpred.readthedocs.io>`_: Analysis of ensemble forecast models for climate prediction. 
 - `infinite-diff <https://github.com/spencerahill/infinite-diff>`_: xarray-based finite-differencing, focused on gridded climate/meterology data
 - `marc_analysis <https://github.com/darothen/marc_analysis>`_: Analysis package for CESM/MARC experiments and output.
 - `MetPy <https://unidata.github.io/MetPy/dev/index.html>`_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data.

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -40,6 +40,8 @@ New functions/methods
 Enhancements
 ~~~~~~~~~~~~
 
+- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
+  It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian <https://github.com/dcherian>`_.
 - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
   ``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
   By `David Brochart <https://github.com/davidbrochart>`_.
@@ -61,8 +63,12 @@ Bug fixes
   By `Tom Nicholas <http://github.com/TomNicholas>`_.
 - Fixed crash when applying ``distributed.Client.compute()`` to a DataArray
   (:issue:`3171`). By `Guido Imperiale <https://github.com/crusaderky>`_.
-
-
+- Better error message when using groupby on an empty DataArray (:issue:`3037`).
+  By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
+- Fix error that arises when using open_mfdataset on a series of netcdf files
+  having differing values for a variable attribute of type list. (:issue:`3034`)
+  By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
+
 .. _whats-new.0.12.3:
 
 v0.12.3 (10 July 2019)
@@ -103,8 +109,6 @@ Bug fixes
 - Fix HDF5 error that could arise when reading multiple groups from a file at
   once (:issue:`2954`).
   By `Stephan Hoyer <https://github.com/shoyer>`_.
-- Better error message when using groupby on an empty DataArray (:issue:`3037`).
-  By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
 
 .. _whats-new.0.12.2:
 

diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py
@@ -4,8 +4,6 @@
 These ones pass, just as you'd hope!
 
 """
-from __future__ import absolute_import, division, print_function
-
 import hypothesis.extra.numpy as npst
 import hypothesis.strategies as st
 from hypothesis import given, settings

diff --git a/setup.cfg b/setup.cfg
@@ -83,6 +83,8 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-seaborn.*]
 ignore_missing_imports = True
+[mypy-sparse.*]
+ignore_missing_imports = True
 [mypy-toolz.*]
 ignore_missing_imports = True
 [mypy-zarr.*]

diff --git a/versioneer.py b/versioneer.py
@@ -398,7 +398,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
                                  stderr=(subprocess.PIPE if hide_stderr
                                          else None))
             break
-        except EnvironmentError:
+        except OSError:
             e = sys.exc_info()[1]
             if e.errno == errno.ENOENT:
                 continue
@@ -421,7 +421,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
     return stdout, p.returncode
 
 
-LONG_VERSION_PY['git'] = '''
+LONG_VERSION_PY['git'] = r'''
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
@@ -968,7 +968,7 @@ def git_get_keywords(versionfile_abs):
                 if mo:
                     keywords["date"] = mo.group(1)
         f.close()
-    except EnvironmentError:
+    except OSError:
         pass
     return keywords
 
@@ -992,11 +992,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         if verbose:
             print("keywords are unexpanded, not using")
         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -1005,7 +1005,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = {r for r in refs if re.search(r'\d', r)}
         if verbose:
             print("discarding '%s', no digits" % ",".join(refs - tags))
     if verbose:
@@ -1148,7 +1148,7 @@ def do_vcs_install(manifest_in, versionfile_source, ipy):
                 if "export-subst" in line.strip().split()[1:]:
                     present = True
         f.close()
-    except EnvironmentError:
+    except OSError:
         pass
     if not present:
         f = open(".gitattributes", "a+")
@@ -1206,7 +1206,7 @@ def versions_from_file(filename):
     try:
         with open(filename) as f:
             contents = f.read()
-    except EnvironmentError:
+    except OSError:
         raise NotThisMethod("unable to read _version.py")
     mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
                    contents, re.M | re.S)
@@ -1702,8 +1702,7 @@ def do_setup():
     root = get_root()
     try:
         cfg = get_config_from_root(root)
-    except (EnvironmentError, configparser.NoSectionError,
-            configparser.NoOptionError) as e:
+    except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e:
         if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
             print("Adding sample versioneer config to setup.cfg",
                   file=sys.stderr)
@@ -1728,7 +1727,7 @@ def do_setup():
         try:
             with open(ipy, "r") as f:
                 old = f.read()
-        except EnvironmentError:
+        except OSError:
             old = ""
         if INIT_PY_SNIPPET not in old:
             print(" appending to %s" % ipy)
@@ -1752,7 +1751,7 @@ def do_setup():
                 if line.startswith("include "):
                     for include in line.split()[1:]:
                         simple_includes.add(include)
-    except EnvironmentError:
+    except OSError:
         pass
     # That doesn't cover everything MANIFEST.in can do
     # (http://docs.python.org/2/distutils/sourcedist.html#commands), so

diff --git a/xarray/_version.py b/xarray/_version.py
@@ -81,7 +81,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
                                  stderr=(subprocess.PIPE if hide_stderr
                                          else None))
             break
-        except EnvironmentError:
+        except OSError:
             e = sys.exc_info()[1]
             if e.errno == errno.ENOENT:
                 continue
@@ -153,7 +153,7 @@ def git_get_keywords(versionfile_abs):
                 if mo:
                     keywords["date"] = mo.group(1)
         f.close()
-    except EnvironmentError:
+    except OSError:
         pass
     return keywords
 
@@ -177,11 +177,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         if verbose:
             print("keywords are unexpanded, not using")
         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -190,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = {r for r in refs if re.search(r'\d', r)}
         if verbose:
             print("discarding '%s', no digits" % ",".join(refs - tags))
     if verbose:

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -609,7 +609,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
                    combine='_old_auto', autoclose=None, parallel=False,
-                   **kwargs):
+                   join='outer', **kwargs):
     """Open multiple files as a single dataset.
 
     If combine='by_coords' then the function ``combine_by_coords`` is used to 
@@ -704,6 +704,16 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
     parallel : bool, optional
         If True, the open and preprocess steps of this function will be
         performed in parallel using ``dask.delayed``. Default is False.
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        String indicating how to combine differing indexes
+        (excluding concat_dim) in objects
+
+        - 'outer': use the union of object indexes
+        - 'inner': use the intersection of object indexes
+        - 'left': use indexes from the first object with each dimension
+        - 'right': use indexes from the last object with each dimension
+        - 'exact': instead of aligning, raise `ValueError` when indexes to be
+          aligned are not equal
     **kwargs : optional
         Additional arguments passed on to :py:func:`xarray.open_dataset`.
 
@@ -742,7 +752,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
         paths = [str(p) if isinstance(p, Path) else p for p in paths]
 
     if not paths:
-        raise IOError('no files to open')
+        raise OSError('no files to open')
 
     # If combine='by_coords' then this is unnecessary, but quick.
     # If combine='nested' then this creates a flat list which is easier to
@@ -798,18 +808,20 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
 
             combined = auto_combine(datasets, concat_dim=concat_dim,
                                     compat=compat, data_vars=data_vars,
-                                    coords=coords, from_openmfds=True)
+                                    coords=coords, join=join,
+                                    from_openmfds=True)
         elif combine == 'nested':
             # Combined nested list by successive concat and merge operations
             # along each dimension, using structure given by "ids"
             combined = _nested_combine(datasets, concat_dims=concat_dim,
                                        compat=compat, data_vars=data_vars,
-                                       coords=coords, ids=ids)
+                                       coords=coords, ids=ids, join=join)
         elif combine == 'by_coords':
             # Redo ordering from coordinates, ignoring how they were ordered
             # previously
             combined = combine_by_coords(datasets, compat=compat,
-                                         data_vars=data_vars, coords=coords)
+                                         data_vars=data_vars, coords=coords,
+                                         join=join)
         else:
             raise ValueError("{} is an invalid option for the keyword argument"
                              " ``combine``".format(combine))
@@ -1039,7 +1051,7 @@ def save_mfdataset(datasets, paths, mode='w', format=None, groups=None,
     if groups is None:
         groups = [None] * len(datasets)
 
-    if len(set([len(datasets), len(paths), len(groups)])) > 1:
+    if len({len(datasets), len(paths), len(groups)}) > 1:
         raise ValueError('must supply lists of the same length for the '
                          'datasets, paths and groups arguments to '
                          'save_mfdataset')

diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
@@ -138,7 +138,7 @@ def _netcdf4_create_group(dataset, name):
 
 
 def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
-    if group in set([None, '', '/']):
+    if group in {None, '', '/'}:
         # use the root group
         return ds
     else:
@@ -155,7 +155,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
                     ds = create_group(ds, key)
                 else:
                     # wrap error to provide slightly more helpful message
-                    raise IOError('group not found: %s' % key, e)
+                    raise OSError('group not found: %s' % key, e)
         return ds
 
 
@@ -195,9 +195,11 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
 
     encoding = variable.encoding.copy()
 
-    safe_to_drop = set(['source', 'original_shape'])
-    valid_encodings = set(['zlib', 'complevel', 'fletcher32', 'contiguous',
-                           'chunksizes', 'shuffle', '_FillValue', 'dtype'])
+    safe_to_drop = {'source', 'original_shape'}
+    valid_encodings = {
+        'zlib', 'complevel', 'fletcher32', 'contiguous',
+        'chunksizes', 'shuffle', '_FillValue', 'dtype'
+    }
     if lsd_okay:
         valid_encodings.add('least_significant_digit')
     if h5py_okay:

diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
@@ -11,9 +11,10 @@
 
 # The following are reserved names in CDL and may not be used as names of
 # variables, dimension, attributes
-_reserved_names = set(['byte', 'char', 'short', 'ushort', 'int', 'uint',
-                       'int64', 'uint64', 'float' 'real', 'double', 'bool',
-                       'string'])
+_reserved_names = {
+    'byte', 'char', 'short', 'ushort', 'int', 'uint', 'int64', 'uint64',
+    'float' 'real', 'double', 'bool', 'string'
+}
 
 # These data-types aren't supported by netCDF3, so they are automatically
 # coerced instead as indicated by the "coerce_nc3_dtype" function
@@ -108,4 +109,4 @@ def is_valid_nc3_name(s):
             ('/' not in s) and
             (s[-1] != ' ') and
             (_isalnumMUTF8(s[0]) or (s[0] == '_')) and
-            all((_isalnumMUTF8(c) or c in _specialchars for c in s)))
+            all(_isalnumMUTF8(c) or c in _specialchars for c in s))
diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py
@@ -75,18 +75,18 @@ def get_variables(self):
                                  for k, v in self.ds.variables.items())
 
     def get_attrs(self):
-        return Frozen(dict([(k, getattr(self.ds, k))
-                            for k in self.ds.ncattrs()]))
+        return Frozen({k: getattr(self.ds, k) for k in self.ds.ncattrs()})
 
     def get_dimensions(self):
         return Frozen(self.ds.dimensions)
 
     def get_encoding(self):
-        encoding = {}
-        encoding['unlimited_dims'] = set(
-            [k for k in self.ds.dimensions
-             if self.ds.dimensions[k].isunlimited()])
-        return encoding
+        return {
+            'unlimited_dims': {
+                k for k in self.ds.dimensions
+                if self.ds.dimensions[k].isunlimited()
+            }
+        }
 
     def close(self):
         self._manager.close()
diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py
@@ -75,10 +75,12 @@ def get_dimensions(self):
         return Frozen(self.ds.dimensions)
 
     def get_encoding(self):
-        encoding = {}
-        encoding['unlimited_dims'] = set(
-            [k for k in self.ds.dimensions if self.ds.unlimited(k)])
-        return encoding
+        return {
+            'unlimited_dims': {
+                k for k in self.ds.dimensions
+                if self.ds.unlimited(k)
+            }
+        }
 
     def close(self):
         self._manager.close()