Skip to content

Commit

Permalink
Omit chunks with no elements in slice selection with step (#1154)
Browse files Browse the repository at this point in the history
* Omit chunks with no elements in slice selection with step

This stops chunks being read unnecessarily when a slice selection with a
step was used. Previously all chunks spanning the start-end range would
be used regardless of whether they contained any elements.

Fixes #843.

* Test that only the required chunks are accessed during basic selections

This tests that only the expected set of chunks are accessed during
basic slice selection operations for both reads and writes to an array.

* Update release notes.

* Fix typos in release notes.
  • Loading branch information
jrs65 authored Oct 7, 2022
1 parent 2dcffcd commit d6e35a5
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 0 deletions.
9 changes: 9 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ Release notes
# .. warning::
# Pre-release! Use :command:`pip install --pre zarr` to evaluate this release.
.. _release_2.13.3:

2.13.3
------

* Improve performance of slice selections with steps by omitting chunks with no relevant
data.
By :user:`Richard Shaw <jrs65>` :issue:`843`.

.. _release_2.13.2:

2.13.2
Expand Down
5 changes: 5 additions & 0 deletions zarr/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@ def __iter__(self):
dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step)
dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start),
self.step)

# If there are no elements on the selection within this chunk, then skip
if dim_chunk_nitems == 0:
continue

dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems)

yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
Expand Down
74 changes: 74 additions & 0 deletions zarr/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
PartialChunkIterator,
)

from zarr.tests.util import CountingDict


def test_normalize_integer_selection():

Expand Down Expand Up @@ -1451,3 +1453,75 @@ def test_numpy_int_indexing():
z[:] = a
assert a[42] == z[42]
assert a[numpy.int64(42)] == z[numpy.int64(42)]


@pytest.mark.parametrize(
"shape, chunks, ops",
[
# 1D test cases
((1070,), (50,), [("__getitem__", (slice(200, 400),))]),
((1070,), (50,), [("__getitem__", (slice(200, 400, 100),))]),
((1070,), (50,), [
("__getitem__", (slice(200, 400),)),
("__setitem__", (slice(200, 400, 100),)),
]),
# 2D test cases
((40, 50), (5, 8), [
("__getitem__", (slice(6, 37, 13), (slice(4, 10)))),
("__setitem__", (slice(None), (slice(None)))),
]),
]
)
def test_accessed_chunks(shape, chunks, ops):
# Test that only the required chunks are accessed during basic selection operations
# shape: array shape
# chunks: chunk size
# ops: list of tuples with (optype, tuple of slices)
# optype = "__getitem__" or "__setitem__", tuple length must match number of dims
import itertools

# Use a counting dict as the backing store so we can track the items access
store = CountingDict()
z = zarr.create(shape=shape, chunks=chunks, store=store)

for ii, (optype, slices) in enumerate(ops):

# Resolve the slices into the accessed chunks for each dimension
chunks_per_dim = []
for N, C, sl in zip(shape, chunks, slices):
chunk_ind = np.arange(N, dtype=int)[sl] // C
chunks_per_dim.append(np.unique(chunk_ind))

# Combine and generate the cartesian product to determine the chunks keys that
# will be accessed
chunks_accessed = []
for comb in itertools.product(*chunks_per_dim):
chunks_accessed.append(".".join([str(ci) for ci in comb]))

counts_before = store.counter.copy()

# Perform the operation
if optype == "__getitem__":
z[slices]
else:
z[slices] = ii

# Get the change in counts
delta_counts = store.counter - counts_before

# Check that the access counts for the operation have increased by one for all
# the chunks we expect to be included
for ci in chunks_accessed:
assert delta_counts.pop((optype, ci)) == 1

# If the chunk was partially written to it will also have been read once. We
# don't determine if the chunk was actually partial here, just that the
# counts are consistent that this might have happened
if optype == "__setitem__":
assert (
("__getitem__", ci) not in delta_counts or
delta_counts.pop(("__getitem__", ci)) == 1
)
# Check that no other chunks were accessed
assert len(delta_counts) == 0

0 comments on commit d6e35a5

Please sign in to comment.