Skip to content

Commit

Permalink
no profile via embedded dask config
Browse files Browse the repository at this point in the history
  • Loading branch information
gjoseph92 committed May 19, 2021
1 parent a2b5621 commit 230e681
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 11 deletions.
7 changes: 7 additions & 0 deletions dask.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# disable dask's profiling
distributed:
worker:
profile:
interval: 2h
cycle: 10h
low-level: false
24 changes: 16 additions & 8 deletions dask_profiling_coiled/run_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,30 @@ def main():
print(f"Waiting for {n_workers} workers...")
client.wait_for_workers(n_workers)

def disable_gc():
# https://github.com/benfred/py-spy/issues/389#issuecomment-833903190
import gc
# def disable_gc():
# # https://github.com/benfred/py-spy/issues/389#issuecomment-833903190
# import gc

gc.disable()
gc.set_threshold(0)
# gc.disable()
# gc.set_threshold(0)

print("Disabling GC on scheduler")
client.run_on_scheduler(disable_gc)
# print("Disabling GC on scheduler")
# client.run_on_scheduler(disable_gc)

# def enable_gc_debug():
# import gc

# gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_COLLECTABLE | gc.DEBUG_UNCOLLECTABLE)

# print("Enabling GC debug logging on scheduler")
# client.run_on_scheduler(enable_gc_debug)

print("Here we go!")

# This is key---otherwise we're uploading ~300MiB of graph to the scheduler
dask.config.set({"optimization.fuse.active": False})

# test_name = "cython-nogc-200workers"
test_name = "cython-gc-maybe-noprofiling-shuffle"
with (
distributed.performance_report(f"results/{test_name}.html"),
pyspy_on_scheduler(
Expand Down
5 changes: 3 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ dependencies:
# - git+https://github.com/gjoseph92/scheduler-profilers.git # TODO this conflicts with --install-option for distributed, using postBuild instead
# - git+https://github.com/gjoseph92/dask-noop.git
variables:
DASK_DISTRIBUTED__WORKER__PROFILE__INTERVAL: 2h
DASK_DISTRIBUTED__WORKER__PROFILE__CYCLE: 10h
DASK_CONFIG: dask.yaml
# DASK_DISTRIBUTED__WORKER__PROFILE__INTERVAL: 2h
# DASK_DISTRIBUTED__WORKER__PROFILE__CYCLE: 10h
22 changes: 21 additions & 1 deletion make-coiled-env.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,30 @@
#!/bin/bash

# Install py-spy separately so it doesn't conflict with Cythonized distributed
# Install py-spy separately so it doesn't conflict with Cythonized distributed.
# Also add dask config.

# HACK: Coiled offers no easy way to add auxiliary data files---or a dask config---in software environments,
# so we generate a post-build shell script that has the contents of `dask.yaml` within itself, and writes
# those contents out when executed.
OUT_CONFIG_PATH="~/.config/dask/dask.yaml"
YAML_CONTENTS=$(<dask.yaml)
cat > postbuild.sh <<EOF
#!/bin/bash
python3 -m pip install git+https://github.com/gjoseph92/scheduler-profilers.git@8d59e7f8b2ab59e22f0937557fefe388eac6ea61
OUT_CONFIG_PATH=$OUT_CONFIG_PATH
# ^ NOTE: no quotes, so ~ expands (https://stackoverflow.com/a/32277036)
mkdir -p \$(dirname \$OUT_CONFIG_PATH)
cat > \$OUT_CONFIG_PATH <<INNER_EOF
$YAML_CONTENTS
INNER_EOF
echo "export DASK_CONFIG=\$OUT_CONFIG_PATH" >> ~/.bashrc
echo "Wrote dask config to \$OUT_CONFIG_PATH:"
cat \$OUT_CONFIG_PATH
EOF
coiled env create -n profiling --conda environment.yml --post-build postbuild.sh
rm postbuild.sh

0 comments on commit 230e681

Please sign in to comment.