-
-
Notifications
You must be signed in to change notification settings - Fork 718
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add failing test for forgotten tasks
- Loading branch information
Showing
1 changed file
with
27 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -395,3 +395,30 @@ async def test_restrictions(c, s, a, b): | |
|
||
await y | ||
assert all(stringify(key) in a.data for key in y.__dask_keys__()) | ||
|
||
|
||
@pytest.mark.xfail(reason="Don't clean up forgotten shuffles") | ||
@gen_cluster(client=True) | ||
async def test_delete_some_results(c, s, a, b): | ||
df = dask.datasets.timeseries( | ||
start="2000-01-01", | ||
end="2000-01-10", | ||
dtypes={"x": float, "y": float}, | ||
freq="10 s", | ||
) | ||
x = dd.shuffle.shuffle(df, "x", shuffle="p2p").persist() | ||
while not s.tasks or not any(ts.state == "memory" for ts in s.tasks.values()): | ||
await asyncio.sleep(0.01) | ||
|
||
n = len(s.tasks) | ||
|
||
x = x.partitions[: x.npartitions // 2].persist() | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
mrocklin
via email
Author
Member
|
||
|
||
while len(s.tasks) == n: | ||
await asyncio.sleep(0.1) | ||
|
||
await x | ||
|
||
clean_worker(a) | ||
clean_worker(b) | ||
clean_scheduler(s) |
I would have expected the order of these to be flipped, as in
That may trigger a more serious issue than just data not being cleaned up. I think it also would cause half of the output data to be missing (basically it would act like the second, full shuffle never happened)?