Skip to content
forked from pydata/xarray

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into fix/4107
Browse files Browse the repository at this point in the history
* upstream/master: (24 commits)
  Compatibility with dask 2021.02.0 (pydata#4884)
  Ensure maximum accuracy when encoding and decoding cftime.datetime values (pydata#4758)
  Fix `bounds_error=True` ignored with 1D interpolation (pydata#4855)
  add a drop_conflicts strategy for merging attrs (pydata#4827)
  update pre-commit hooks (mypy) (pydata#4883)
  ensure warnings cannot become errors in assert_ (pydata#4864)
  update pre-commit hooks (pydata#4874)
  small fixes for the docstrings of swap_dims and integrate (pydata#4867)
  Modify _encode_datetime_with_cftime for compatibility with cftime > 1.4.0 (pydata#4871)
  vélin (pydata#4872)
  don't skip the doctests CI (pydata#4869)
  fix da.pad example for numpy 1.20 (pydata#4865)
  temporarily pin dask (pydata#4873)
  Add units if "unit" is in the attrs. (pydata#4850)
  speed up the repr for big MultiIndex objects (pydata#4846)
  dim -> coord in DataArray.integrate (pydata#3993)
  WIP: backend interface, now it uses subclassing  (pydata#4836)
  weighted: small improvements (pydata#4818)
  Update related-projects.rst (pydata#4844)
  iris update doc url (pydata#4845)
  ...
  • Loading branch information
dcherian committed Feb 12, 2021
2 parents 1a1f431 + 2a34bfb commit 8ce7175
Show file tree
Hide file tree
Showing 69 changed files with 1,483 additions and 892 deletions.
8 changes: 0 additions & 8 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,3 @@
- [ ] Passes `pre-commit run --all-files`
- [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst`
- [ ] New functions/methods are listed in `api.rst`


<sub>
<h3>
Overriding CI behaviors
</h3>
By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a <tt>[test-upstream]</tt> tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a <tt>[skip-ci]</tt> tag to the first line of the commit message
</sub>
2 changes: 0 additions & 2 deletions .github/workflows/ci-additional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ jobs:
doctest:
name: Doctests
runs-on: "ubuntu-latest"
needs: detect-ci-trigger
if: needs.detect-ci-trigger.outputs.triggered == 'false'
defaults:
run:
shell: bash -l {0}
Expand Down
9 changes: 7 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,20 @@ repos:
hooks:
- id: black
- repo: https://github.com/keewis/blackdoc
rev: v0.3.2
rev: v0.3.3
hooks:
- id: blackdoc
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4
hooks:
- id: flake8
# - repo: https://github.com/Carreau/velin
# rev: 0.0.8
# hooks:
# - id: velin
# args: ["--write", "--compact"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.790 # Must match ci/requirements/*.yml
rev: v0.800
hooks:
- id: mypy
exclude: "properties|asv_bench"
Expand Down
18 changes: 18 additions & 0 deletions asv_bench/benchmarks/repr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pandas as pd

import xarray as xr


class ReprMultiIndex:
def setup(self, key):
index = pd.MultiIndex.from_product(
[range(10000), range(10000)], names=("level_0", "level_1")
)
series = pd.Series(range(100000000), index=index)
self.da = xr.DataArray(series)

def time_repr(self):
repr(self.da)

def time_repr_html(self):
self.da._repr_html_()
15 changes: 10 additions & 5 deletions asv_bench/benchmarks/unstacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,23 @@

class Unstacking:
def setup(self):
data = np.random.RandomState(0).randn(1, 1000, 500)
self.ds = xr.DataArray(data).stack(flat_dim=["dim_1", "dim_2"])
data = np.random.RandomState(0).randn(500, 1000)
self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...])
self.da_missing = self.da_full[:-1]
self.df_missing = self.da_missing.to_pandas()

def time_unstack_fast(self):
self.ds.unstack("flat_dim")
self.da_full.unstack("flat_dim")

def time_unstack_slow(self):
self.ds[:, ::-1].unstack("flat_dim")
self.da_missing.unstack("flat_dim")

def time_unstack_pandas_slow(self):
self.df_missing.unstack()


class UnstackingDask(Unstacking):
def setup(self, *args, **kwargs):
requires_dask()
super().setup(**kwargs)
self.ds = self.ds.chunk({"flat_dim": 50})
self.da_full = self.da_full.chunk({"flat_dim": 50})
4 changes: 0 additions & 4 deletions ci/requirements/py37-min-all-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ dependencies:
# When upgrading python, numpy, or pandas, must also change
# doc/installing.rst and setup.py.
- python=3.7
- black
- boto3=1.9
- bottleneck=1.2
- cartopy=0.17
Expand All @@ -18,16 +17,13 @@ dependencies:
- coveralls
- dask=2.9
- distributed=2.9
- flake8
- h5netcdf=0.7
- h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest
- hdf5=1.10
- hypothesis
- iris=2.2
- isort
- lxml=4.4 # Optional dep of pydap
- matplotlib-base=3.1
- mypy=0.782 # Must match .pre-commit-config.yaml
- nc-time-axis=1.2
- netcdf4=1.4
- numba=0.46
Expand Down
4 changes: 0 additions & 4 deletions ci/requirements/py38-all-but-dask.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,19 @@ channels:
- nodefaults
dependencies:
- python=3.8
- black
- boto3
- bottleneck
- cartopy
- cdms2
- cfgrib
- cftime
- coveralls
- flake8
- h5netcdf
- h5py=2
- hdf5
- hypothesis
- isort
- lxml # Optional dep of pydap
- matplotlib-base
- mypy=0.790 # Must match .pre-commit-config.yaml
- nc-time-axis
- netcdf4
- numba
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
"iris": ("https://scitools.org.uk/iris/docs/latest", None),
"iris": ("https://scitools-iris.readthedocs.io/en/latest", None),
"numpy": ("https://numpy.org/doc/stable", None),
"scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
"numba": ("https://numba.pydata.org/numba-doc/latest", None),
Expand Down
1 change: 1 addition & 0 deletions doc/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,7 @@ PR checklist
- Write new tests if needed. See `"Test-driven development/code writing" <https://xarray.pydata.org/en/stable/contributing.html#test-driven-development-code-writing>`_.
- Test the code using `Pytest <http://doc.pytest.org/en/latest/>`_. Running all tests (type ``pytest`` in the root directory) takes a while, so feel free to only run the tests you think are needed based on your PR (example: ``pytest xarray/tests/test_dataarray.py``). CI will catch any failing tests.
- By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a <tt>[test-upstream]</tt> tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a "[skip-ci]" tag to the first line of the commit message.
- **Properly format your code** and verify that it passes the formatting guidelines set by `Black <https://black.readthedocs.io/en/stable/>`_ and `Flake8 <http://flake8.pycqa.org/en/latest/>`_. See `"Code formatting" <https://xarray.pydata.org/en/stablcontributing.html#code-formatting>`_. You can use `pre-commit <https://pre-commit.com/>`_ to run these automatically on each commit.
Expand Down
2 changes: 1 addition & 1 deletion doc/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ different approaches to handling metadata: Iris strictly interprets
`CF conventions`_. Iris particularly shines at mapping, thanks to its
integration with Cartopy_.

.. _Iris: http://scitools.org.uk/iris/
.. _Iris: https://scitools-iris.readthedocs.io/en/stable/
.. _Cartopy: http://scitools.org.uk/cartopy/docs/latest/

`UV-CDAT`__ is another Python library that implements in-memory netCDF-like
Expand Down
1 change: 1 addition & 0 deletions doc/related-projects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Geosciences
- `aospy <https://aospy.readthedocs.io>`_: Automated analysis and management of gridded climate data.
- `climpred <https://climpred.readthedocs.io>`_: Analysis of ensemble forecast models for climate prediction.
- `geocube <https://corteva.github.io/geocube>`_: Tool to convert geopandas vector data into rasterized xarray data.
- `GeoWombat <https://github.com/jgrss/geowombat>`_: Utilities for analysis of remotely sensed and gridded raster data at scale (easily tame Landsat, Sentinel, Quickbird, and PlanetScope).
- `infinite-diff <https://github.com/spencerahill/infinite-diff>`_: xarray-based finite-differencing, focused on gridded climate/meterology data
- `marc_analysis <https://github.com/darothen/marc_analysis>`_: Analysis package for CESM/MARC experiments and output.
- `MetPy <https://unidata.github.io/MetPy/dev/index.html>`_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data.
Expand Down
38 changes: 36 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ What's New
.. _whats-new.0.16.3:

v0.16.3 (unreleased)
v0.17.0 (unreleased)
--------------------

Breaking changes
Expand All @@ -42,11 +42,39 @@ Breaking changes
- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull:`4725`).
By `Aureliana Barghini <https://github.com/aurghs>`_.

Deprecations
~~~~~~~~~~~~

- ``dim`` argument to :py:meth:`DataArray.integrate` is being deprecated in
favour of a ``coord`` argument, for consistency with :py:meth:`Dataset.integrate`.
For now using ``dim`` issues a ``FutureWarning``. It will be removed in
version 0.19.0 (:pull:`3993`).
By `Tom Nicholas <https://github.com/TomNicholas>`_.


New Features
~~~~~~~~~~~~
- Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O
roundtripping of ``cftime.datetime`` objects (:pull:`4758`).
By `Spencer Clark <https://github.com/spencerkclark>`_.
- :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support
millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies
for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`4758`).
By `Spencer Clark <https://github.com/spencerkclark>`_.
- Significantly higher ``unstack`` performance on numpy-backed arrays which
contain missing values; 8x faster in our benchmark, and 2x faster than pandas.
(:pull:`4746`);
By `Maximilian Roos <https://github.com/max-sixty>`_.

- Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables.
By `Deepak Cherian <https://github.com/dcherian>`_
- add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg
(:issue:`4749`, :pull:`4827`).
By `Justus Magin <https://github.com/keewis>`_.
By `Deepak Cherian <https://github.com/dcherian>`_.
- :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims
in the form of kwargs as well as a dict, like most similar methods.
By `Maximilian Roos <https://github.com/max-sixty>`_.

Bug fixes
~~~~~~~~~
Expand Down Expand Up @@ -83,6 +111,10 @@ Bug fixes
- Convert to IndexVariable or Variable during renaming as appropriate. (:issue:`4107`, :issue:`4417`, :pull:`4108`)
By `Deepak Cherian <https://github.com/dcherian>`_.
- Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo <https://github.com/mesejo>`_.
- Ensure that :py:meth:`Dataset.interp` raises ``ValueError`` when interpolating outside coordinate range and ``bounds_error=True`` (:issue:`4854`, :pull:`4855`).
By `Leif Denby <https://github.com/leifdenby>`_.
- Fix time encoding bug associated with using cftime versions greater than
1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark <https://github.com/spencerkclark>`_.

Documentation
~~~~~~~~~~~~~
Expand Down Expand Up @@ -113,6 +145,8 @@ Internal Changes
in ipython (:issue:`4741`, :pull:`4742`). By `Richard Kleijn <https://github.com/rhkleijn>`_.
- Added the ``set_close`` method to ``Dataset`` and ``DataArray`` for beckends to specify how to voluntary release
all resources. (:pull:`#4809`), By `Alessandro Amici <https://github.com/alexamici>`_.
- Ensure warnings cannot be turned into exceptions in :py:func:`testing.assert_equal` and
the other ``assert_*`` functions (:pull:`4864`). By `Mathias Hauser <https://github.com/mathause>`_.

.. _whats-new.0.16.2:

Expand All @@ -129,7 +163,7 @@ Deprecations

- :py:attr:`~core.accessor_dt.DatetimeAccessor.weekofyear` and :py:attr:`~core.accessor_dt.DatetimeAccessor.week`
have been deprecated. Use ``DataArray.dt.isocalendar().week``
instead (:pull:`4534`). By `Mathias Hauser <https://github.com/mathause>`_,
instead (:pull:`4534`). By `Mathias Hauser <https://github.com/mathause>`_.
`Maximilian Roos <https://github.com/max-sixty>`_, and `Spencer Clark <https://github.com/spencerkclark>`_.
- :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` no longer support passing ``keep_attrs``
via its constructor. Pass ``keep_attrs`` via the applied function, i.e. use
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ force_to_top = true
default_section = THIRDPARTY
known_first_party = xarray

[mypy]

# Most of the numerical computing stack doesn't have type annotations yet.
[mypy-affine.*]
ignore_missing_imports = True
Expand Down
106 changes: 51 additions & 55 deletions xarray/backends/cfgrib_.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
BackendEntrypoint,
)
from .locks import SerializableLock, ensure_lock
from .store import open_backend_dataset_store
from .store import StoreBackendEntrypoint

try:
import cfgrib
Expand Down Expand Up @@ -86,62 +86,58 @@ def get_encoding(self):
return encoding


def guess_can_open_cfgrib(store_spec):
try:
_, ext = os.path.splitext(store_spec)
except TypeError:
return False
return ext in {".grib", ".grib2", ".grb", ".grb2"}


def open_backend_dataset_cfgrib(
filename_or_obj,
*,
mask_and_scale=True,
decode_times=None,
concat_characters=None,
decode_coords=None,
drop_variables=None,
use_cftime=None,
decode_timedelta=None,
lock=None,
indexpath="{path}.{short_hash}.idx",
filter_by_keys={},
read_keys=[],
encode_cf=("parameter", "time", "geography", "vertical"),
squeeze=True,
time_dims=("time", "step"),
):

store = CfGribDataStore(
class CfgribfBackendEntrypoint(BackendEntrypoint):
def guess_can_open(self, store_spec):
try:
_, ext = os.path.splitext(store_spec)
except TypeError:
return False
return ext in {".grib", ".grib2", ".grb", ".grb2"}

def open_dataset(
self,
filename_or_obj,
indexpath=indexpath,
filter_by_keys=filter_by_keys,
read_keys=read_keys,
encode_cf=encode_cf,
squeeze=squeeze,
time_dims=time_dims,
lock=lock,
)

with close_on_error(store):
ds = open_backend_dataset_store(
store,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
concat_characters=concat_characters,
decode_coords=decode_coords,
drop_variables=drop_variables,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
*,
mask_and_scale=True,
decode_times=None,
concat_characters=None,
decode_coords=None,
drop_variables=None,
use_cftime=None,
decode_timedelta=None,
lock=None,
indexpath="{path}.{short_hash}.idx",
filter_by_keys={},
read_keys=[],
encode_cf=("parameter", "time", "geography", "vertical"),
squeeze=True,
time_dims=("time", "step"),
):

store = CfGribDataStore(
filename_or_obj,
indexpath=indexpath,
filter_by_keys=filter_by_keys,
read_keys=read_keys,
encode_cf=encode_cf,
squeeze=squeeze,
time_dims=time_dims,
lock=lock,
)
return ds


cfgrib_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_cfgrib, guess_can_open=guess_can_open_cfgrib
)
store_entrypoint = StoreBackendEntrypoint()
with close_on_error(store):
ds = store_entrypoint.open_dataset(
store,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
concat_characters=concat_characters,
decode_coords=decode_coords,
drop_variables=drop_variables,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
)
return ds


if has_cfgrib:
BACKEND_ENTRYPOINTS["cfgrib"] = cfgrib_backend
BACKEND_ENTRYPOINTS["cfgrib"] = CfgribfBackendEntrypoint
Loading

0 comments on commit 8ce7175

Please sign in to comment.