Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🧪 Add tests for benchmarking script #297

Merged
merged 9 commits into from
May 12, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/helpers/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def setup_model_train(
config.dataset.category = category
config.dataset.path = dataset_path
config.project.log_images_to = []
config.trainer.gpus = device
config.trainer.devices = device

# If weight file is empty, remove the key from config
if "weight_file" in config.model.keys() and weight_file == "":
Expand Down
15 changes: 15 additions & 0 deletions tests/pre_merge/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Test tools."""

# Copyright (C) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
15 changes: 15 additions & 0 deletions tests/pre_merge/tools/benchmarking/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Test benchmarking script."""

# Copyright (C) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
14 changes: 14 additions & 0 deletions tests/pre_merge/tools/benchmarking/benchmark_params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
seed: 42
compute_openvino: false
hardware:
- cpu
- gpu
writer:
- tensorboard
grid_search:
dataset:
category:
- bottle
- cable
model_name:
- padim
60 changes: 60 additions & 0 deletions tests/pre_merge/tools/benchmarking/test_benchmarking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Test benchmarking script on a subset of models and categories."""

# Copyright (C) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.

import sys

# Since tools is not part of the anomalib package, accessing benchmarking requires importlib
sys.path.append("tools/benchmarking")
from importlib.util import find_spec

if find_spec("benchmark") is not None:
from benchmark import distribute
else:
raise Exception("Unable to import benchmarking script for testing")


from pathlib import Path

from omegaconf import OmegaConf

from tests.helpers.dataset import get_dataset_path


def check_tf_logs(model: str):
Copy link
Contributor

@samet-akcay samet-akcay May 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does tf refer to? If tensorboard, should this be check_tb_logs?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeap! My bad

"""check if TensorBoard logs are generated."""
for device in ["gpu", "cpu"]:
assert (
len(list(Path("runs", f"{model}_{device}").glob("events.out.tfevents.*"))) > 0
), f"Benchmarking script didn't generate tensorboard logs for {model}"


def check_csv(model: str):
"""Check if csv files are generated"""
for device in ["gpu", "cpu"]:
assert Path(
"runs", f"{model}_{device}.csv"
).exists(), f"Benchmarking script didn't generate csv logs for {model}"


def test_benchmarking():
"""Test if benchmarking script produces the required artifacts."""
config_path = "tests/pre_merge/tools/benchmarking/benchmark_params.yaml"
test_config = OmegaConf.load(config_path)
test_config.grid_search.dataset["path"] = [get_dataset_path()]

distribute(test_config)
check_tf_logs("padim")
check_csv("padim")
50 changes: 31 additions & 19 deletions tools/benchmarking/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import sys
import time
import warnings
from argparse import ArgumentParser
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -146,16 +147,15 @@ def get_single_model_metrics(model_config: Union[DictConfig, ListConfig], openvi
return data


def compute_on_cpu():
def compute_on_cpu(sweep_config: Union[DictConfig, ListConfig]):
"""Compute all run configurations over a sigle CPU."""
sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
for run_config in get_run_config(sweep_config.grid_search):
model_metrics = sweep(run_config, 0, sweep_config.seed, False)
write_metrics(model_metrics, sweep_config.writer)


def compute_on_gpu(
run_configs: Union[DictConfig, ListConfig],
run_configs: List[DictConfig],
device: int,
seed: int,
writers: List[str],
Expand All @@ -180,9 +180,8 @@ def compute_on_gpu(
)


def distribute_over_gpus():
def distribute_over_gpus(sweep_config: Union[DictConfig, ListConfig]):
"""Distribute metric collection over all available GPUs. This is done by splitting the list of configurations."""
sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
with ProcessPoolExecutor(
max_workers=torch.cuda.device_count(), mp_context=multiprocessing.get_context("spawn")
) as executor:
Expand All @@ -205,34 +204,33 @@ def distribute_over_gpus():
try:
job.result()
except Exception as exc:
raise Exception(f"Error occurred while computing benchmark on device {job}") from exc
raise Exception(f"Error occurred while computing benchmark on GPU {job}") from exc


def distribute():
def distribute(config: Union[DictConfig, ListConfig]):
"""Run all cpu experiments on a single process. Distribute gpu experiments over all available gpus.

Args:
device_count (int, optional): If device count is 0, uses only cpu else spawn processes according
to number of gpus available on the machine. Defaults to 0.
config: (Union[DictConfig, ListConfig]): Sweep configuration.
"""
sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
devices = sweep_config.hardware

devices = config.hardware
if not torch.cuda.is_available() and "gpu" in devices:
pl_logger.warning("Config requested GPU benchmarking but torch could not detect any cuda enabled devices")
elif {"cpu", "gpu"}.issubset(devices):
# Create process for gpu and cpu
with ProcessPoolExecutor(max_workers=2, mp_context=multiprocessing.get_context("spawn")) as executor:
jobs = [executor.submit(compute_on_cpu), executor.submit(distribute_over_gpus)]
jobs = [executor.submit(compute_on_cpu, config), executor.submit(distribute_over_gpus, config)]
for job in as_completed(jobs):
try:
job.result()
except Exception as exception:
raise Exception(f"Error occurred while computing benchmark on device {job}") from exception
elif "cpu" in devices:
compute_on_cpu()
compute_on_cpu(config)
elif "gpu" in devices:
distribute_over_gpus()
if "wandb" in sweep_config.writer:
distribute_over_gpus(config)
if "wandb" in config.writer:
upload_to_wandb(team="anomalib")


Expand Down Expand Up @@ -264,7 +262,16 @@ def sweep(
model_config = update_input_size_config(model_config)

# Set device in config. 0 - cpu, [0], [1].. - gpu id
model_config.trainer.gpus = 0 if device == 0 else [device - 1]
if device != 0:
model_config.trainer.devices = [device - 1]
model_config.trainer.accelerator = "gpu"
else:
model_config.trainer.accelerator = "cpu"

# Remove legacy flags
for legacy_device in ["num_processes", "gpus", "ipus", "tpu_cores"]:
if legacy_device in model_config.trainer:
model_config.trainer[legacy_device] = None

if run_config.model_name in ["patchcore", "cflow"]:
convert_openvino = False # `torch.cdist` is not supported by onnx version 11
Expand Down Expand Up @@ -297,6 +304,11 @@ def sweep(
# Spawn multiple processes one for cpu and rest for the number of gpus available in the system.
# The idea is to distribute metrics collection over all the available devices.

logger.info("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
distribute()
logger.info("Finished gathering results ⚡")
parser = ArgumentParser()
parser.add_argument("--config", type=Path, help="Path to sweep configuration")
_args = parser.parse_args()

print("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
_sweep_config = OmegaConf.load(_args.config)
distribute(_sweep_config)
print("Finished gathering results ⚡")