diff --git a/dask.yaml b/dask.yaml new file mode 100644 index 0000000..b8f12a4 --- /dev/null +++ b/dask.yaml @@ -0,0 +1,7 @@ +# disable dask's profiling +distributed: + worker: + profile: + interval: 2h + cycle: 10h + low-level: false diff --git a/dask_profiling_coiled/run_profile.py b/dask_profiling_coiled/run_profile.py index ede7a35..c41eed5 100644 --- a/dask_profiling_coiled/run_profile.py +++ b/dask_profiling_coiled/run_profile.py @@ -85,22 +85,30 @@ def main(): print(f"Waiting for {n_workers} workers...") client.wait_for_workers(n_workers) - def disable_gc(): - # https://github.com/benfred/py-spy/issues/389#issuecomment-833903190 - import gc + # def disable_gc(): + # # https://github.com/benfred/py-spy/issues/389#issuecomment-833903190 + # import gc - gc.disable() - gc.set_threshold(0) + # gc.disable() + # gc.set_threshold(0) - print("Disabling GC on scheduler") - client.run_on_scheduler(disable_gc) + # print("Disabling GC on scheduler") + # client.run_on_scheduler(disable_gc) + + # def enable_gc_debug(): + # import gc + + # gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_COLLECTABLE | gc.DEBUG_UNCOLLECTABLE) + + # print("Enabling GC debug logging on scheduler") + # client.run_on_scheduler(enable_gc_debug) print("Here we go!") # This is key---otherwise we're uploading ~300MiB of graph to the scheduler dask.config.set({"optimization.fuse.active": False}) - # test_name = "cython-nogc-200workers" + test_name = "cython-gc-maybe-noprofiling-shuffle" with ( distributed.performance_report(f"results/{test_name}.html"), pyspy_on_scheduler( diff --git a/environment.yml b/environment.yml index 4670fe4..c44c33d 100644 --- a/environment.yml +++ b/environment.yml @@ -28,5 +28,6 @@ dependencies: # - git+https://github.com/gjoseph92/scheduler-profilers.git # TODO this conflicts with --install-option for distributed, using postBuild instead # - git+https://github.com/gjoseph92/dask-noop.git variables: - DASK_DISTRIBUTED__WORKER__PROFILE__INTERVAL: 2h - DASK_DISTRIBUTED__WORKER__PROFILE__CYCLE: 10h \ No newline at end of file + DASK_CONFIG: dask.yaml + # DASK_DISTRIBUTED__WORKER__PROFILE__INTERVAL: 2h + # DASK_DISTRIBUTED__WORKER__PROFILE__CYCLE: 10h diff --git a/make-coiled-env.sh b/make-coiled-env.sh index f3404e9..0f87d5f 100755 --- a/make-coiled-env.sh +++ b/make-coiled-env.sh @@ -1,10 +1,30 @@ #!/bin/bash -# Install py-spy separately so it doesn't conflict with Cythonized distributed +# Install py-spy separately so it doesn't conflict with Cythonized distributed. +# Also add dask config. + +# HACK: Coiled offers no easy way to add auxiliary data files---or a dask config---in software environments, +# so we generate a post-build shell script that has the contents of `dask.yaml` within itself, and writes +# those contents out when executed. +OUT_CONFIG_PATH="~/.config/dask/dask.yaml" +YAML_CONTENTS=$( postbuild.sh < \$OUT_CONFIG_PATH <> ~/.bashrc + +echo "Wrote dask config to \$OUT_CONFIG_PATH:" +cat \$OUT_CONFIG_PATH EOF coiled env create -n profiling --conda environment.yml --post-build postbuild.sh rm postbuild.sh