Skip to content

Commit

Permalink
🧪 Add tests for benchmarking script (#297)
Browse files Browse the repository at this point in the history
* Stage changes

* Add test for benchmarking script

* Modify tests for CI

* Move benchmarking tests to nightly

* Rename tf to tb

* Fx merge

Co-authored-by: Ashwin Vaidya <ashwinitinvaidya@gmail.com>
  • Loading branch information
ashwinvaidya17 and Ashwin Vaidya committed May 12, 2022
1 parent 40e0c63 commit 995aa8c
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 19 deletions.
15 changes: 15 additions & 0 deletions tests/nightly/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Test tools."""

# Copyright (C) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
15 changes: 15 additions & 0 deletions tests/nightly/tools/benchmarking/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Test benchmarking script."""

# Copyright (C) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
14 changes: 14 additions & 0 deletions tests/nightly/tools/benchmarking/benchmark_params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
seed: 42
compute_openvino: false
hardware:
- cpu
- gpu
writer:
- tensorboard
grid_search:
dataset:
category:
- bottle
- cable
model_name:
- padim
60 changes: 60 additions & 0 deletions tests/nightly/tools/benchmarking/test_benchmarking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Test benchmarking script on a subset of models and categories."""

# Copyright (C) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.

import sys

# Since tools is not part of the anomalib package, accessing benchmarking requires importlib
sys.path.append("tools/benchmarking")
from importlib.util import find_spec

if find_spec("benchmark") is not None:
from benchmark import distribute
else:
raise Exception("Unable to import benchmarking script for testing")


from pathlib import Path

from omegaconf import OmegaConf

from tests.helpers.dataset import get_dataset_path


def check_tb_logs(model: str):
"""check if TensorBoard logs are generated."""
for device in ["gpu", "cpu"]:
assert (
len(list(Path("runs", f"{model}_{device}").glob("events.out.tfevents.*"))) > 0
), f"Benchmarking script didn't generate tensorboard logs for {model}"


def check_csv(model: str):
"""Check if csv files are generated"""
for device in ["gpu", "cpu"]:
assert Path(
"runs", f"{model}_{device}.csv"
).exists(), f"Benchmarking script didn't generate csv logs for {model}"


def test_benchmarking():
"""Test if benchmarking script produces the required artifacts."""
config_path = "tests/pre_merge/tools/benchmarking/benchmark_params.yaml"
test_config = OmegaConf.load(config_path)
test_config.grid_search.dataset["path"] = [get_dataset_path()]

distribute(test_config)
check_tb_logs("padim")
check_csv("padim")
50 changes: 31 additions & 19 deletions tools/benchmarking/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import sys
import time
import warnings
from argparse import ArgumentParser
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -146,16 +147,15 @@ def get_single_model_metrics(model_config: Union[DictConfig, ListConfig], openvi
return data


def compute_on_cpu():
def compute_on_cpu(sweep_config: Union[DictConfig, ListConfig]):
"""Compute all run configurations over a sigle CPU."""
sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
for run_config in get_run_config(sweep_config.grid_search):
model_metrics = sweep(run_config, 0, sweep_config.seed, False)
write_metrics(model_metrics, sweep_config.writer)


def compute_on_gpu(
run_configs: Union[DictConfig, ListConfig],
run_configs: List[DictConfig],
device: int,
seed: int,
writers: List[str],
Expand All @@ -180,9 +180,8 @@ def compute_on_gpu(
)


def distribute_over_gpus():
def distribute_over_gpus(sweep_config: Union[DictConfig, ListConfig]):
"""Distribute metric collection over all available GPUs. This is done by splitting the list of configurations."""
sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
with ProcessPoolExecutor(
max_workers=torch.cuda.device_count(), mp_context=multiprocessing.get_context("spawn")
) as executor:
Expand All @@ -205,34 +204,33 @@ def distribute_over_gpus():
try:
job.result()
except Exception as exc:
raise Exception(f"Error occurred while computing benchmark on device {job}") from exc
raise Exception(f"Error occurred while computing benchmark on GPU {job}") from exc


def distribute():
def distribute(config: Union[DictConfig, ListConfig]):
"""Run all cpu experiments on a single process. Distribute gpu experiments over all available gpus.
Args:
device_count (int, optional): If device count is 0, uses only cpu else spawn processes according
to number of gpus available on the machine. Defaults to 0.
config: (Union[DictConfig, ListConfig]): Sweep configuration.
"""
sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
devices = sweep_config.hardware

devices = config.hardware
if not torch.cuda.is_available() and "gpu" in devices:
pl_logger.warning("Config requested GPU benchmarking but torch could not detect any cuda enabled devices")
elif {"cpu", "gpu"}.issubset(devices):
# Create process for gpu and cpu
with ProcessPoolExecutor(max_workers=2, mp_context=multiprocessing.get_context("spawn")) as executor:
jobs = [executor.submit(compute_on_cpu), executor.submit(distribute_over_gpus)]
jobs = [executor.submit(compute_on_cpu, config), executor.submit(distribute_over_gpus, config)]
for job in as_completed(jobs):
try:
job.result()
except Exception as exception:
raise Exception(f"Error occurred while computing benchmark on device {job}") from exception
elif "cpu" in devices:
compute_on_cpu()
compute_on_cpu(config)
elif "gpu" in devices:
distribute_over_gpus()
if "wandb" in sweep_config.writer:
distribute_over_gpus(config)
if "wandb" in config.writer:
upload_to_wandb(team="anomalib")


Expand Down Expand Up @@ -264,7 +262,16 @@ def sweep(
model_config = update_input_size_config(model_config)

# Set device in config. 0 - cpu, [0], [1].. - gpu id
model_config.trainer.gpus = 0 if device == 0 else [device - 1]
if device != 0:
model_config.trainer.devices = [device - 1]
model_config.trainer.accelerator = "gpu"
else:
model_config.trainer.accelerator = "cpu"

# Remove legacy flags
for legacy_device in ["num_processes", "gpus", "ipus", "tpu_cores"]:
if legacy_device in model_config.trainer:
model_config.trainer[legacy_device] = None

if run_config.model_name in ["patchcore", "cflow"]:
convert_openvino = False # `torch.cdist` is not supported by onnx version 11
Expand Down Expand Up @@ -297,6 +304,11 @@ def sweep(
# Spawn multiple processes one for cpu and rest for the number of gpus available in the system.
# The idea is to distribute metrics collection over all the available devices.

logger.info("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
distribute()
logger.info("Finished gathering results ⚡")
parser = ArgumentParser()
parser.add_argument("--config", type=Path, help="Path to sweep configuration")
_args = parser.parse_args()

print("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
_sweep_config = OmegaConf.load(_args.config)
distribute(_sweep_config)
print("Finished gathering results ⚡")

0 comments on commit 995aa8c

Please sign in to comment.