Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vendorize skimage #1091

Merged
merged 7 commits into from
Mar 18, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 44 additions & 67 deletions conda-linux-64.lock

Large diffs are not rendered by default.

121 changes: 49 additions & 72 deletions conda-osx-64.lock

Large diffs are not rendered by default.

115 changes: 46 additions & 69 deletions conda-win-64.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,6 @@ requests-oauthlib==1.3.0
requests-toolbelt==0.8.0
requests==2.24.0
rsa==4.6
scikit-image==0.16.2
scikit-learn==0.22.1
scipy==1.5.4
send2trash==1.5.0
Expand Down
3 changes: 0 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,14 @@ dependencies:
- pandas=1.0.1
- intake
- intake-xarray
- metpy
- netcdf4>=1.4
- numba
- numpy
- partd
- pooch==1.1.1
- pyyaml==5.3
- pytest
- pytest-mpl
- python-kubernetes
- scikit-image
- scikit-learn
- scipy >= 1.5.0
- toolz
Expand Down
2 changes: 0 additions & 2 deletions external/fv3fit/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ partd
Pillow
Pint
pluggy
pooch
py
pyarrow
pyasn1
Expand All @@ -73,7 +72,6 @@ PyWavelets
requests
requests-oauthlib
rsa
scikit-image
scipy >= 1.5.0
six
traitlets
Expand Down
3 changes: 1 addition & 2 deletions external/vcm/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
"xarray",
"toolz",
"scipy",
"scikit-image",
"metpy",
"pooch==1.1.1",
"joblib",
"intake",
"gcsfs",
"zarr",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
919ca49721b5b0a8628f273067a8fb4f
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
919ca49721b5b0a8628f273067a8fb4f
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
919ca49721b5b0a8628f273067a8fb4f
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
919ca49721b5b0a8628f273067a8fb4f
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a9aa230ec5c5bf71f92e77753b13023d
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[('one', 'a9aa230ec5c5bf71f92e77753b13023d'), ('two', '9a75cafb8e8600ef3aa16a87b7befc0c')]
21 changes: 5 additions & 16 deletions external/vcm/tests/test_cubedsphere.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pytest
import xarray as xr
from skimage.measure import block_reduce as skimage_block_reduce
import xgcm

from vcm.cubedsphere.coarsen import (
Expand Down Expand Up @@ -30,6 +29,7 @@
from vcm.cubedsphere.io import all_filenames
from vcm.cubedsphere import create_fv3_grid
from vcm.xarray_utils import assert_identical_including_dtype
import vcm.testing


def remove_duplicate_coords(ds):
Expand Down Expand Up @@ -260,27 +260,16 @@ def input_dataset(input_dataarray):

@pytest.mark.parametrize("reduction_function", [np.mean, np.median])
@pytest.mark.parametrize("use_dask", [False, True])
def test_xarray_block_reduce_dataarray(reduction_function, use_dask, input_dataarray):
block_size = (2, 2, 1)
expected_data = skimage_block_reduce(
input_dataarray.values, block_size=block_size, func=reduction_function
)
expected = xr.DataArray(
expected_data,
dims=input_dataarray.dims,
coords=None,
name="foo",
attrs={"units": "m"},
)

def test_xarray_block_reduce_dataarray(
reduction_function, use_dask, input_dataarray, regtest
):
if use_dask:
input_dataarray = input_dataarray.chunk({"x": 2, "y": 2, "z": -1})

block_sizes = {"x": 2, "y": 2}
result = _xarray_block_reduce_dataarray(
input_dataarray, block_sizes, reduction_function
)
assert_identical_including_dtype(result, expected)
print(vcm.testing.checksum_dataarray(result), file=regtest)
nbren12 marked this conversation as resolved.
Show resolved Hide resolved


def test_xarray_block_reduce_dataarray_bad_chunk_size(input_dataarray):
Expand Down
17 changes: 17 additions & 0 deletions external/vcm/tests/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,26 @@
import pytest

from vcm import testing
import xarray


def test_no_warning():
with pytest.raises(AssertionError):
with testing.no_warning(None):
warnings.warn("Warning")


def test_checksum_dataarray(regtest):
"""If these checksums fail then some changed probably happened in
joblib.hash
"""
array = xarray.DataArray([1], dims=["x"])
print(testing.checksum_dataarray(array), file=regtest)


def test_checksum_dataarray_mapping(regtest):
"""If these checksums fail then some changed probably happened in
joblib.hash
"""
ds = xarray.Dataset({"one": ("x", [1]), "two": ("x", [2])})
print(testing.checksum_dataarray_mapping(ds), file=regtest)
202 changes: 202 additions & 0 deletions external/vcm/vcm/cubedsphere/_skimage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""
Vendorized scikit-image code.

From https://github.com/scikit-image/scikit-image/tree/a69d49272145d9839df75f3ee398c0bbde32f448/skimage/util # noqa


## License (Modified BSD)

Copyright (C) 2011, the scikit-image team
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of skimage nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import numpy as np
from numpy.lib.stride_tricks import as_strided


def view_as_blocks(arr_in, block_shape):
"""Block view of the input n-dimensional array (using re-striding).
Blocks are non-overlapping views of the input array.
Parameters
----------
arr_in : ndarray
N-d input array.
block_shape : tuple
The shape of the block. Each dimension must divide evenly into the
corresponding dimensions of `arr_in`.
Returns
-------
arr_out : ndarray
Block view of the input array.
Examples
--------
>>> import numpy as np
>>> from skimage.util.shape import view_as_blocks
>>> A = np.arange(4*4).reshape(4,4)
>>> A
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
>>> B = view_as_blocks(A, block_shape=(2, 2))
>>> B[0, 0]
array([[0, 1],
[4, 5]])
>>> B[0, 1]
array([[2, 3],
[6, 7]])
>>> B[1, 0, 1, 1]
13
>>> A = np.arange(4*4*6).reshape(4,4,6)
>>> A # doctest: +NORMALIZE_WHITESPACE
array([[[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]],
[[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35],
[36, 37, 38, 39, 40, 41],
[42, 43, 44, 45, 46, 47]],
[[48, 49, 50, 51, 52, 53],
[54, 55, 56, 57, 58, 59],
[60, 61, 62, 63, 64, 65],
[66, 67, 68, 69, 70, 71]],
[[72, 73, 74, 75, 76, 77],
[78, 79, 80, 81, 82, 83],
[84, 85, 86, 87, 88, 89],
[90, 91, 92, 93, 94, 95]]])
>>> B = view_as_blocks(A, block_shape=(1, 2, 2))
>>> B.shape
(4, 2, 3, 1, 2, 2)
>>> B[2:, 0, 2] # doctest: +NORMALIZE_WHITESPACE
array([[[[52, 53],
[58, 59]]],
[[[76, 77],
[82, 83]]]])
"""
if not isinstance(block_shape, tuple):
raise TypeError("block needs to be a tuple")

block_shape = np.array(block_shape)
if (block_shape <= 0).any():
raise ValueError("'block_shape' elements must be strictly positive")

if block_shape.size != arr_in.ndim:
raise ValueError("'block_shape' must have the same length " "as 'arr_in.shape'")

arr_shape = np.array(arr_in.shape)
if (arr_shape % block_shape).sum() != 0:
raise ValueError("'block_shape' is not compatible with 'arr_in'")

# -- restride the array to build the block view
new_shape = tuple(arr_shape // block_shape) + tuple(block_shape)
new_strides = tuple(arr_in.strides * block_shape) + arr_in.strides

arr_out = as_strided(arr_in, shape=new_shape, strides=new_strides)

return arr_out


def block_reduce(image, block_size, func=np.sum, cval=0, func_kwargs=None):
"""Downsample image by applying function `func` to local blocks.
This function is useful for max and mean pooling, for example.
Parameters
----------
image : ndarray
N-dimensional input image.
block_size : array_like
Array containing down-sampling integer factor along each axis.
func : callable
Function object which is used to calculate the return value for each
local block. This function must implement an ``axis`` parameter.
Primary functions are ``numpy.sum``, ``numpy.min``, ``numpy.max``,
``numpy.mean`` and ``numpy.median``. See also `func_kwargs`.
cval : float
Constant padding value if image is not perfectly divisible by the
block size.
func_kwargs : dict
Keyword arguments passed to `func`. Notably useful for passing dtype
argument to ``np.mean``. Takes dictionary of inputs, e.g.:
``func_kwargs={'dtype': np.float16})``.
Returns
-------
image : ndarray
Down-sampled image with same number of dimensions as input image.
Examples
--------
>>> from skimage.measure import block_reduce
>>> image = np.arange(3*3*4).reshape(3, 3, 4)
>>> image # doctest: +NORMALIZE_WHITESPACE
array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]],
[[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]],
[[24, 25, 26, 27],
[28, 29, 30, 31],
[32, 33, 34, 35]]])
>>> block_reduce(image, block_size=(3, 3, 1), func=np.mean)
array([[[16., 17., 18., 19.]]])
>>> image_max1 = block_reduce(image, block_size=(1, 3, 4), func=np.max)
>>> image_max1 # doctest: +NORMALIZE_WHITESPACE
array([[[11]],
[[23]],
[[35]]])
>>> image_max2 = block_reduce(image, block_size=(3, 1, 4), func=np.max)
>>> image_max2 # doctest: +NORMALIZE_WHITESPACE
array([[[27],
[31],
[35]]])
"""

if len(block_size) != image.ndim:
raise ValueError("`block_size` must have the same length " "as `image.shape`.")

if func_kwargs is None:
func_kwargs = {}

pad_width = []
for i in range(len(block_size)):
if block_size[i] < 1:
raise ValueError(
"Down-sampling factors must be >= 1. Use "
"`skimage.transform.resize` to up-sample an "
"image."
)
if image.shape[i] % block_size[i] != 0:
after_width = block_size[i] - (image.shape[i] % block_size[i])
else:
after_width = 0
pad_width.append((0, after_width))

image = np.pad(image, pad_width=pad_width, mode="constant", constant_values=cval)

blocked = view_as_blocks(image, block_size)

return func(blocked, axis=tuple(range(image.ndim, blocked.ndim)), **func_kwargs)
2 changes: 1 addition & 1 deletion external/vcm/vcm/cubedsphere/coarsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numpy as np
import scipy.stats
import xarray as xr
from skimage.measure import block_reduce as skimage_block_reduce
from ._skimage import block_reduce as skimage_block_reduce

from .. import xarray_utils
from vcm.cubedsphere.constants import COORD_X_OUTER, COORD_Y_OUTER
Expand Down
22 changes: 22 additions & 0 deletions external/vcm/vcm/testing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from typing import Hashable, List, Tuple, Mapping
import contextlib
import pytest
import numpy as np
import joblib
import xarray


@contextlib.contextmanager
Expand Down Expand Up @@ -28,3 +32,21 @@ def no_warning(*args):
yield

assert len(record) == 0


def checksum_dataarray(xobj) -> str:
return joblib.hash(np.asarray(xobj))


def checksum_dataarray_mapping(
d: Mapping[Hashable, xarray.DataArray]
) -> List[Tuple[Hashable, str]]:
"""Checksum a mapping of datarrays

Returns:
sorted list of (key, hash) combinations. This is sorted to simplify
regression testing.

"""
sorted_keys = sorted(d.keys())
return [(key, checksum_dataarray(d[key])) for key in sorted_keys]
1 change: 0 additions & 1 deletion workflows/dataflow/examples/diagnostics_to_zarr/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"google-cloud-storage",
"intake",
"numba",
"scikit-image",
"netCDF4",
"xarray==0.15.0",
"partd",
Expand Down
Loading