Skip to content

Commit

Permalink
gc debug no profiling
Browse files Browse the repository at this point in the history
killed this after ~5min. scheduler could barely open dashboard. clearly turning on GC debug affected something.
  • Loading branch information
gjoseph92 committed May 19, 2021
1 parent ac61e5f commit c0ea2aa
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 9 deletions.
12 changes: 6 additions & 6 deletions dask_profiling_coiled/run_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,20 +91,20 @@ def main():
# print("Disabling GC on scheduler")
# client.run_on_scheduler(disable_gc)

# def enable_gc_debug():
# import gc
def enable_gc_debug():
import gc

# gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_COLLECTABLE | gc.DEBUG_UNCOLLECTABLE)
gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_COLLECTABLE | gc.DEBUG_UNCOLLECTABLE)

# print("Enabling GC debug logging on scheduler")
# client.run_on_scheduler(enable_gc_debug)
print("Enabling GC debug logging on scheduler")
client.run_on_scheduler(enable_gc_debug)

print("Here we go!")

# This is key---otherwise we're uploading ~300MiB of graph to the scheduler
dask.config.set({"optimization.fuse.active": False})

test_name = "cython-shuffle-gc-noprofiling-env"
test_name = "cython-shuffle-gc-debug-noprofiling"
with (
distributed.performance_report(f"results/{test_name}.html"),
pyspy_on_scheduler(
Expand Down
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,4 @@ dependencies:
# - git+https://github.com/gjoseph92/scheduler-profilers.git # TODO this conflicts with --install-option for distributed, using postBuild instead
# - git+https://github.com/gjoseph92/dask-noop.git
variables:
DASK_DISTRIBUTED__WORKER__PROFILE__INTERVAL: 2h
DASK_DISTRIBUTED__WORKER__PROFILE__CYCLE: 10h
DASK_CONFIG: dask.yaml
22 changes: 21 additions & 1 deletion make-coiled-env.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,30 @@
#!/bin/bash

# Install py-spy separately so it doesn't conflict with Cythonized distributed
# Install py-spy separately so it doesn't conflict with Cythonized distributed.
# Also add dask config.

# HACK: Coiled offers no easy way to add auxiliary data files---or a dask config---in software environments,
# so we generate a post-build shell script that has the contents of `dask.yaml` within itself, and writes
# those contents out when executed.
OUT_CONFIG_PATH="~/.config/dask/dask.yaml"
YAML_CONTENTS=$(<dask.yaml)
cat > postbuild.sh <<EOF
#!/bin/bash
python3 -m pip install git+https://github.com/gjoseph92/scheduler-profilers.git@8d59e7f8b2ab59e22f0937557fefe388eac6ea61
OUT_CONFIG_PATH=$OUT_CONFIG_PATH
# ^ NOTE: no quotes, so ~ expands (https://stackoverflow.com/a/32277036)
mkdir -p \$(dirname \$OUT_CONFIG_PATH)
cat > \$OUT_CONFIG_PATH <<INNER_EOF
$YAML_CONTENTS
INNER_EOF
echo "export DASK_CONFIG=\$OUT_CONFIG_PATH" >> ~/.bashrc
echo "Wrote dask config to \$OUT_CONFIG_PATH:"
cat \$OUT_CONFIG_PATH
EOF
coiled env create -n profiling --conda environment.yml --post-build postbuild.sh
rm postbuild.sh
88 changes: 88 additions & 0 deletions results/cython-shuffle-gc-debug-noprofiling.html

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions results/cython-shuffle-gc-debug-noprofiling.json

Large diffs are not rendered by default.

0 comments on commit c0ea2aa

Please sign in to comment.