🧪 Add tests for benchmarking script (#297)

* Stage changes * Add test for benchmarking script * Modify tests for CI * Move benchmarking tests to nightly * Rename tf to tb * Fx merge Co-authored-by: Ashwin Vaidya <ashwinitinvaidya@gmail.com>
openvinotoolkit · May 12, 2022 · 995aa8c · 995aa8c
1 parent 40e0c63
commit 995aa8c
Show file tree

Hide file tree

Showing 5 changed files with 135 additions and 19 deletions.
diff --git a/tests/nightly/tools/__init__.py b/tests/nightly/tools/__init__.py
@@ -0,0 +1,15 @@
+"""Test tools."""
+
+# Copyright (C) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
diff --git a/tests/nightly/tools/benchmarking/__init__.py b/tests/nightly/tools/benchmarking/__init__.py
@@ -0,0 +1,15 @@
+"""Test benchmarking script."""
+
+# Copyright (C) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
diff --git a/tests/nightly/tools/benchmarking/benchmark_params.yaml b/tests/nightly/tools/benchmarking/benchmark_params.yaml
@@ -0,0 +1,14 @@
+seed: 42
+compute_openvino: false
+hardware:
+  - cpu
+  - gpu
+writer:
+  - tensorboard
+grid_search:
+  dataset:
+    category:
+      - bottle
+      - cable
+  model_name:
+    - padim
diff --git a/tests/nightly/tools/benchmarking/test_benchmarking.py b/tests/nightly/tools/benchmarking/test_benchmarking.py
@@ -0,0 +1,60 @@
+"""Test benchmarking script on a subset of models and categories."""
+
+# Copyright (C) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+import sys
+
+# Since tools is not part of the anomalib package, accessing benchmarking requires importlib
+sys.path.append("tools/benchmarking")
+from importlib.util import find_spec
+
+if find_spec("benchmark") is not None:
+    from benchmark import distribute
+else:
+    raise Exception("Unable to import benchmarking script for testing")
+
+
+from pathlib import Path
+
+from omegaconf import OmegaConf
+
+from tests.helpers.dataset import get_dataset_path
+
+
+def check_tb_logs(model: str):
+    """check if TensorBoard logs are generated."""
+    for device in ["gpu", "cpu"]:
+        assert (
+            len(list(Path("runs", f"{model}_{device}").glob("events.out.tfevents.*"))) > 0
+        ), f"Benchmarking script didn't generate tensorboard logs for {model}"
+
+
+def check_csv(model: str):
+    """Check if csv files are generated"""
+    for device in ["gpu", "cpu"]:
+        assert Path(
+            "runs", f"{model}_{device}.csv"
+        ).exists(), f"Benchmarking script didn't generate csv logs for {model}"
+
+
+def test_benchmarking():
+    """Test if benchmarking script produces the required artifacts."""
+    config_path = "tests/pre_merge/tools/benchmarking/benchmark_params.yaml"
+    test_config = OmegaConf.load(config_path)
+    test_config.grid_search.dataset["path"] = [get_dataset_path()]
+
+    distribute(test_config)
+    check_tb_logs("padim")
+    check_csv("padim")
diff --git a/tools/benchmarking/benchmark.py b/tools/benchmarking/benchmark.py
@@ -23,6 +23,7 @@
 import sys
 import time
 import warnings
+from argparse import ArgumentParser
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from pathlib import Path
 from tempfile import TemporaryDirectory
@@ -146,16 +147,15 @@ def get_single_model_metrics(model_config: Union[DictConfig, ListConfig], openvi
     return data
 
 
-def compute_on_cpu():
+def compute_on_cpu(sweep_config: Union[DictConfig, ListConfig]):
     """Compute all run configurations over a sigle CPU."""
-    sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
     for run_config in get_run_config(sweep_config.grid_search):
         model_metrics = sweep(run_config, 0, sweep_config.seed, False)
         write_metrics(model_metrics, sweep_config.writer)
 
 
 def compute_on_gpu(
-    run_configs: Union[DictConfig, ListConfig],
+    run_configs: List[DictConfig],
     device: int,
     seed: int,
     writers: List[str],
@@ -180,9 +180,8 @@ def compute_on_gpu(
             )
 
 
-def distribute_over_gpus():
+def distribute_over_gpus(sweep_config: Union[DictConfig, ListConfig]):
     """Distribute metric collection over all available GPUs. This is done by splitting the list of configurations."""
-    sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
     with ProcessPoolExecutor(
         max_workers=torch.cuda.device_count(), mp_context=multiprocessing.get_context("spawn")
     ) as executor:
@@ -205,34 +204,33 @@ def distribute_over_gpus():
             try:
                 job.result()
             except Exception as exc:
-                raise Exception(f"Error occurred while computing benchmark on device {job}") from exc
+                raise Exception(f"Error occurred while computing benchmark on GPU {job}") from exc
 
 
-def distribute():
+def distribute(config: Union[DictConfig, ListConfig]):
     """Run all cpu experiments on a single process. Distribute gpu experiments over all available gpus.
 
     Args:
-        device_count (int, optional): If device count is 0, uses only cpu else spawn processes according
-        to number of gpus available on the machine. Defaults to 0.
+        config: (Union[DictConfig, ListConfig]): Sweep configuration.
     """
-    sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml")
-    devices = sweep_config.hardware
+
+    devices = config.hardware
     if not torch.cuda.is_available() and "gpu" in devices:
         pl_logger.warning("Config requested GPU benchmarking but torch could not detect any cuda enabled devices")
     elif {"cpu", "gpu"}.issubset(devices):
         # Create process for gpu and cpu
         with ProcessPoolExecutor(max_workers=2, mp_context=multiprocessing.get_context("spawn")) as executor:
-            jobs = [executor.submit(compute_on_cpu), executor.submit(distribute_over_gpus)]
+            jobs = [executor.submit(compute_on_cpu, config), executor.submit(distribute_over_gpus, config)]
             for job in as_completed(jobs):
                 try:
                     job.result()
                 except Exception as exception:
                     raise Exception(f"Error occurred while computing benchmark on device {job}") from exception
     elif "cpu" in devices:
-        compute_on_cpu()
+        compute_on_cpu(config)
     elif "gpu" in devices:
-        distribute_over_gpus()
-    if "wandb" in sweep_config.writer:
+        distribute_over_gpus(config)
+    if "wandb" in config.writer:
         upload_to_wandb(team="anomalib")
 
 
@@ -264,7 +262,16 @@ def sweep(
     model_config = update_input_size_config(model_config)
 
     # Set device in config. 0 - cpu, [0], [1].. - gpu id
-    model_config.trainer.gpus = 0 if device == 0 else [device - 1]
+    if device != 0:
+        model_config.trainer.devices = [device - 1]
+        model_config.trainer.accelerator = "gpu"
+    else:
+        model_config.trainer.accelerator = "cpu"
+
+    # Remove legacy flags
+    for legacy_device in ["num_processes", "gpus", "ipus", "tpu_cores"]:
+        if legacy_device in model_config.trainer:
+            model_config.trainer[legacy_device] = None
 
     if run_config.model_name in ["patchcore", "cflow"]:
         convert_openvino = False  # `torch.cdist` is not supported by onnx version 11
@@ -297,6 +304,11 @@ def sweep(
     # Spawn multiple processes one for cpu and rest for the number of gpus available in the system.
     # The idea is to distribute metrics collection over all the available devices.
 
-    logger.info("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
-    distribute()
-    logger.info("Finished gathering results ⚡")
+    parser = ArgumentParser()
+    parser.add_argument("--config", type=Path, help="Path to sweep configuration")
+    _args = parser.parse_args()
+
+    print("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.")
+    _sweep_config = OmegaConf.load(_args.config)
+    distribute(_sweep_config)
+    print("Finished gathering results ⚡")