🚀 Anomalib Pipelines (#2060)

* 🚀 Anomalib Pipelines (#2005) * Add initial design Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Refactor + add to CLI Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Support grid search on class path Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * redirect outputs Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * design v2 Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * remove commented code Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * add dummy experiment Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * add config Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Refactor Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Add tests Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Apply suggestions from code review Co-authored-by: Samet Akcay <samet.akcay@intel.com> * address pr comments Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Apply suggestions from code review Co-authored-by: Samet Akcay <samet.akcay@intel.com> * refactor Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Simplify argparse Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * modify logger redirect Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * update docstrings Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> --------- Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> Co-authored-by: Samet Akcay <samet.akcay@intel.com> * 🐞 Fix Rich Progress with Patchcore Training (#2062) Add safe track Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * [Pipelines] 🔨 Intra-stage result passing (#2061) * Add initial design Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Refactor + add to CLI Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Support grid search on class path Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * redirect outputs Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * design v2 Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * remove commented code Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * add dummy experiment Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * add config Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Refactor Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Add tests Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Apply suggestions from code review Co-authored-by: Samet Akcay <samet.akcay@intel.com> * address pr comments Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Apply suggestions from code review Co-authored-by: Samet Akcay <samet.akcay@intel.com> * refactor Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Simplify argparse Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * modify logger redirect Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * update docstrings Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> * Add proposal Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> --------- Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> Co-authored-by: Samet Akcay <samet.akcay@intel.com> * Update src/anomalib/pipelines/benchmark/job.py --------- Signed-off-by: Ashwin Vaidya <ashwinnitinvaidya@gmail.com> Co-authored-by: Samet Akcay <samet.akcay@intel.com>
openvinotoolkit · May 24, 2024 · a4c5a2b · a4c5a2b
1 parent 5ca1612
commit a4c5a2b
Show file tree

Hide file tree

Showing 36 changed files with 1,068 additions and 20 deletions.
diff --git a/src/anomalib/cli/cli.py b/src/anomalib/cli/cli.py
@@ -15,6 +15,7 @@
 from rich import traceback
 
 from anomalib import TaskType, __version__
+from anomalib.cli.pipelines import PIPELINE_REGISTRY, pipeline_subcommands, run_pipeline
 from anomalib.cli.utils.help_formatter import CustomHelpFormatter, get_short_docstring
 from anomalib.cli.utils.openvino import add_openvino_export_arguments
 from anomalib.loggers import configure_logger
@@ -131,6 +132,13 @@ def add_subcommands(self, **kwargs) -> None:
             # add arguments to subcommand
             getattr(self, f"add_{subcommand}_arguments")(sub_parser)
 
+        # Add pipeline subcommands
+        if PIPELINE_REGISTRY is not None:
+            for subcommand, value in pipeline_subcommands().items():
+                sub_parser = PIPELINE_REGISTRY[subcommand].get_parser()
+                self.subcommand_parsers[subcommand] = sub_parser
+                parser_subcommands.add_subcommand(subcommand, sub_parser, help=value["description"])
+
     def add_arguments_to_parser(self, parser: ArgumentParser) -> None:
         """Extend trainer's arguments to add engine arguments.
 
@@ -355,6 +363,8 @@ def _run_subcommand(self) -> None:
             fn = getattr(self.engine, self.subcommand)
             fn_kwargs = self._prepare_subcommand_kwargs(self.subcommand)
             fn(**fn_kwargs)
+        elif PIPELINE_REGISTRY is not None and self.subcommand in pipeline_subcommands():
+            run_pipeline(self.config)
         else:
             self.config_init = self.parser.instantiate_classes(self.config)
             getattr(self, f"{self.subcommand}")()

diff --git a/src/anomalib/cli/pipelines.py b/src/anomalib/cli/pipelines.py
@@ -0,0 +1,41 @@
+"""Subcommand for pipelines."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import logging
+
+from jsonargparse import Namespace
+
+from anomalib.cli.utils.help_formatter import get_short_docstring
+from anomalib.utils.exceptions import try_import
+
+logger = logging.getLogger(__name__)
+
+if try_import("anomalib.pipelines"):
+    from anomalib.pipelines import Benchmark
+    from anomalib.pipelines.components.base import Pipeline
+
+    PIPELINE_REGISTRY: dict[str, type[Pipeline]] | None = {"benchmark": Benchmark}
+else:
+    PIPELINE_REGISTRY = None
+
+
+def pipeline_subcommands() -> dict[str, dict[str, str]]:
+    """Return subcommands for pipelines."""
+    if PIPELINE_REGISTRY is not None:
+        return {name: {"description": get_short_docstring(pipeline)} for name, pipeline in PIPELINE_REGISTRY.items()}
+    return {}
+
+
+def run_pipeline(args: Namespace) -> None:
+    """Run pipeline."""
+    logger.warning("This feature is experimental. It may change or be removed in the future.")
+    if PIPELINE_REGISTRY is not None:
+        subcommand = args.subcommand
+        config = args[subcommand]
+        PIPELINE_REGISTRY[subcommand]().run(config)
+    else:
+        msg = "Pipeline is not available"
+        raise ValueError(msg)
diff --git a/src/anomalib/cli/utils/help_formatter.py b/src/anomalib/cli/utils/help_formatter.py
@@ -6,7 +6,6 @@
 import argparse
 import re
 import sys
-from typing import TypeVar
 
 import docstring_parser
 from jsonargparse import DefaultHelpFormatter
@@ -38,11 +37,11 @@
     print("To use other subcommand using `anomalib install`")
 
 
-def get_short_docstring(component: TypeVar) -> str:
+def get_short_docstring(component: type) -> str:
     """Get the short description from the docstring.
 
     Args:
-        component (TypeVar): The component to get the docstring from
+        component (type): The component to get the docstring from
 
     Returns:
         str: The short description

diff --git a/src/anomalib/data/__init__.py b/src/anomalib/data/__init__.py
@@ -3,7 +3,6 @@
 # Copyright (C) 2022-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-
 import importlib
 import logging
 from enum import Enum
@@ -29,20 +28,35 @@
 )
 
 
-def get_datamodule(config: DictConfig | ListConfig) -> AnomalibDataModule:
+class UnknownDatamoduleError(ModuleNotFoundError):
+    ...
+
+
+def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule:
     """Get Anomaly Datamodule.
 
     Args:
-        config (DictConfig | ListConfig): Configuration of the anomaly model.
+        config (DictConfig | ListConfig | dict): Configuration of the anomaly model.
 
     Returns:
         PyTorch Lightning DataModule
     """
     logger.info("Loading the datamodule")
 
-    module = importlib.import_module(".".join(config.data.class_path.split(".")[:-1]))
-    dataclass = getattr(module, config.data.class_path.split(".")[-1])
-    init_args = {**config.data.get("init_args", {})}  # get dict
+    if isinstance(config, dict):
+        config = DictConfig(config)
+
+    try:
+        _config = config.data if "data" in config else config
+        if len(_config.class_path.split(".")) > 1:
+            module = importlib.import_module(".".join(_config.class_path.split(".")[:-1]))
+        else:
+            module = importlib.import_module("anomalib.data")
+    except ModuleNotFoundError as exception:
+        logger.exception(f"ModuleNotFoundError: {_config.class_path}")
+        raise UnknownDatamoduleError from exception
+    dataclass = getattr(module, _config.class_path.split(".")[-1])
+    init_args = {**_config.get("init_args", {})}  # get dict
     if "image_size" in init_args:
         init_args["image_size"] = to_tuple(init_args["image_size"])
 

diff --git a/src/anomalib/engine/engine.py b/src/anomalib/engine/engine.py
@@ -9,7 +9,7 @@
 from typing import Any
 
 import torch
-from lightning.pytorch.callbacks import Callback
+from lightning.pytorch.callbacks import Callback, RichModelSummary, RichProgressBar
 from lightning.pytorch.loggers import Logger
 from lightning.pytorch.trainer import Trainer
 from lightning.pytorch.utilities.types import _EVALUATE_OUTPUT, _PREDICT_OUTPUT, EVAL_DATALOADERS, TRAIN_DATALOADERS
@@ -406,7 +406,7 @@ def _setup_transform(
 
     def _setup_anomalib_callbacks(self) -> None:
         """Set up callbacks for the trainer."""
-        _callbacks: list[Callback] = []
+        _callbacks: list[Callback] = [RichProgressBar(), RichModelSummary()]
 
         # Add ModelCheckpoint if it is not in the callbacks list.
         has_checkpoint_callback = any(isinstance(c, ModelCheckpoint) for c in self._cache.args["callbacks"])

diff --git a/src/anomalib/loggers/mlflow.py b/src/anomalib/loggers/mlflow.py
@@ -7,12 +7,8 @@
 from lightning.pytorch.utilities import rank_zero_only
 from matplotlib.figure import Figure
 
-from anomalib.utils.exceptions.imports import try_import
-
 from .base import ImageLoggerBase
 
-try_import("mlflow")
-
 
 class AnomalibMLFlowLogger(ImageLoggerBase, MLFlowLogger):
     """Logger for MLFlow.

diff --git a/src/anomalib/metrics/f1_score.py b/src/anomalib/metrics/f1_score.py
@@ -6,7 +6,6 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-
 import logging
 from typing import Any, Literal
 

diff --git a/src/anomalib/models/components/sampling/k_center_greedy.py b/src/anomalib/models/components/sampling/k_center_greedy.py
@@ -8,10 +8,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import torch
-from rich.progress import track
 from torch.nn import functional as F  # noqa: N812
 
 from anomalib.models.components.dimensionality_reduction import SparseRandomProjection
+from anomalib.utils.rich import safe_track
 
 
 class KCenterGreedy:
@@ -98,7 +98,7 @@ def select_coreset_idxs(self, selected_idxs: list[int] | None = None) -> list[in
 
         selected_coreset_idxs: list[int] = []
         idx = int(torch.randint(high=self.n_observations, size=(1,)).item())
-        for _ in track(range(self.coreset_size), description="Selecting Coreset Indices."):
+        for _ in safe_track(sequence=range(self.coreset_size), description="Selecting Coreset Indices."):
             self.update_distances(cluster_centers=[idx])
             idx = self.get_new_idx()
             if idx in selected_idxs:

diff --git a/src/anomalib/pipelines/__init__.py b/src/anomalib/pipelines/__init__.py
@@ -0,0 +1,8 @@
+"""Pipelines for end-to-end usecases."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from .benchmark import Benchmark
+
+__all__ = ["Benchmark"]
diff --git a/src/anomalib/pipelines/benchmark/__init__.py b/src/anomalib/pipelines/benchmark/__init__.py
@@ -0,0 +1,8 @@
+"""Benchmarking."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from .pipeline import Benchmark
+
+__all__ = ["Benchmark"]
diff --git a/src/anomalib/pipelines/benchmark/generator.py b/src/anomalib/pipelines/benchmark/generator.py
@@ -0,0 +1,47 @@
+"""Benchmark job generator."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from collections.abc import Generator
+
+from anomalib.data import get_datamodule
+from anomalib.models import get_model
+from anomalib.pipelines.components import JobGenerator
+from anomalib.pipelines.components.utils import get_iterator_from_grid_dict
+from anomalib.pipelines.types import PREV_STAGE_RESULT
+from anomalib.utils.logging import hide_output
+
+from .job import BenchmarkJob
+
+
+class BenchmarkJobGenerator(JobGenerator):
+    """Generate BenchmarkJob.
+
+    Args:
+        accelerator (str): The accelerator to use.
+    """
+
+    def __init__(self, accelerator: str) -> None:
+        self.accelerator = accelerator
+
+    @property
+    def job_class(self) -> type:
+        """Return the job class."""
+        return BenchmarkJob
+
+    @hide_output
+    def generate_jobs(
+        self,
+        args: dict,
+        previous_stage_result: PREV_STAGE_RESULT,
+    ) -> Generator[BenchmarkJob, None, None]:
+        """Return iterator based on the arguments."""
+        del previous_stage_result  # Not needed for this job
+        for _container in get_iterator_from_grid_dict(args):
+            yield BenchmarkJob(
+                accelerator=self.accelerator,
+                seed=_container["seed"],
+                model=get_model(_container["model"]),
+                datamodule=get_datamodule(_container["data"]),
+            )
diff --git a/src/anomalib/pipelines/benchmark/job.py b/src/anomalib/pipelines/benchmark/job.py
@@ -0,0 +1,108 @@
+"""Benchmarking job."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from datetime import datetime
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Any
+
+import pandas as pd
+from lightning import seed_everything
+from rich.console import Console
+from rich.table import Table
+
+from anomalib.data import AnomalibDataModule
+from anomalib.engine import Engine
+from anomalib.models import AnomalyModule
+from anomalib.pipelines.components import Job
+from anomalib.utils.logging import hide_output
+
+logger = logging.getLogger(__name__)
+
+
+class BenchmarkJob(Job):
+    """Benchmarking job.
+
+    Args:
+        accelerator (str): The accelerator to use.
+        model (AnomalyModule): The model to use.
+        datamodule (AnomalibDataModule): The data module to use.
+        seed (int): The seed to use.
+    """
+
+    name = "benchmark"
+
+    def __init__(self, accelerator: str, model: AnomalyModule, datamodule: AnomalibDataModule, seed: int) -> None:
+        super().__init__()
+        self.accelerator = accelerator
+        self.model = model
+        self.datamodule = datamodule
+        self.seed = seed
+
+    @hide_output
+    def run(
+        self,
+        task_id: int | None = None,
+    ) -> dict[str, Any]:
+        """Run the benchmark."""
+        devices: str | list[int] = "auto"
+        if task_id is not None:
+            devices = [task_id]
+            logger.info(f"Running job {self.model.__class__.__name__} with device {task_id}")
+        with TemporaryDirectory() as temp_dir:
+            seed_everything(self.seed)
+            engine = Engine(
+                accelerator=self.accelerator,
+                devices=devices,
+                default_root_dir=temp_dir,
+            )
+            engine.fit(self.model, self.datamodule)
+            test_results = engine.test(self.model, self.datamodule)
+        # TODO(ashwinvaidya17): Restore throughput
+        # https://github.com/openvinotoolkit/anomalib/issues/2054
+        output = {
+            "seed": self.seed,
+            "accelerator": self.accelerator,
+            "model": self.model.__class__.__name__,
+            "data": self.datamodule.__class__.__name__,
+            "category": self.datamodule.category,
+            **test_results[0],
+        }
+        logger.info(f"Completed with result {output}")
+        return output
+
+    @staticmethod
+    def collect(results: list[dict[str, Any]]) -> pd.DataFrame:
+        """Gather the results returned from run."""
+        output: dict[str, Any] = {}
+        for key in results[0]:
+            output[key] = []
+        for result in results:
+            for key, value in result.items():
+                output[key].append(value)
+        return pd.DataFrame(output)
+
+    @staticmethod
+    def save(result: pd.DataFrame) -> None:
+        """Save the result to a csv file."""
+        BenchmarkJob._print_tabular_results(result)
+        file_path = Path("runs") / BenchmarkJob.name / datetime.now().strftime("%Y-%m-%d-%H_%M_%S") / "results.csv"
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        result.to_csv(file_path, index=False)
+        logger.info(f"Saved results to {file_path}")
+
+    @staticmethod
+    def _print_tabular_results(gathered_result: pd.DataFrame) -> None:
+        """Print the tabular results."""
+        if gathered_result is not None:
+            console = Console()
+            table = Table(title=f"{BenchmarkJob.name} Results", show_header=True, header_style="bold magenta")
+            _results = gathered_result.to_dict("list")
+            for column in _results:
+                table.add_column(column)
+            for row in zip(*_results.values(), strict=False):
+                table.add_row(*[str(value) for value in row])
+            console.print(table)