From aa76acc8679c0829c82f9eede62fc9a65e088598 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Fri, 24 May 2024 14:26:10 +0100 Subject: [PATCH 1/4] Update `CHANGELOG.md` (#2087) Create changelog --- CHANGELOG.md | 74 ++++++++++++++++++++++++++++++++++------ src/anomalib/__init__.py | 2 +- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c103706da..61842696bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,27 +8,79 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added -- πŸš€ Update OpenVINO and ONNX export to support fixed input shape by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/2006 -- Add data_path argument to predict entrypoint and add properties for retrieving model path by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/2018 +### Changed + +### Deprecated + +### Fixed + +### New Contributors + +**Full Changelog**: + +## [v1.1.0] + +### Added + +- πŸš€ Add support for MLFlow logger by @DoMaLi94 in https://github.com/openvinotoolkit/anomalib/pull/1847 +- πŸ“š Add Transform behaviour+documentation by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/1953 +- πŸ“š Add documentation on how to use the tiler by @blaz-r in https://github.com/openvinotoolkit/anomalib/pull/1960 +- πŸ’¬ Add Discord badge to `README.md` by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2012 +- πŸš€ Add Auto-Encoder based FRE by @nahuja-intel in https://github.com/openvinotoolkit/anomalib/pull/2025 - πŸš€ Add compression and quantization for OpenVINO export by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/2052 -- πŸš€from_config API: Create a path between API & configuration file (CLI) by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/2065 +- πŸš€ Add Anomalib Pipelines by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2060 +- πŸš€ Add `from_config` API: Create a path between API & configuration file (CLI) by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/2065 +- πŸš€ Add data filter in tar extract by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2084 ### Changed -- WinCLIP: set device in text embedding collection and apply forward pass with no grad, by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/1984 - πŸ”¨ Move all export functionalities to AnomalyModule as base methods by @thinhngo-x in () -- Remove unnecessary jsonargparse dependencies by @davnn in () -- Use default model-specific eval transform when only train_transform specified by @djdameln(https://github.com/djdameln) in () -- πŸ”¨Rename OptimalF1 to F1Max for consistency with the literature, by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1980 -- 🐞Update OptimalF1 score to use BinaryPrecisionRecallCurve and remove num_classes by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1972 +- ⬆️ Update torch and lightning package versions by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1949 +- πŸ”¨ Use default model-specific eval transform when only train_transform specified by @djdameln(https://github.com/djdameln) in () +- πŸ”¨ Replace `@abstractproperty` since it is deprecated by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1964 +- πŸ› οΈ Update OptimalF1 Score by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1972 +- πŸ”¨ Rename OptimalF1 to F1Max for consistency with the literature, by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1980 +- πŸ”¨ WinCLIP: set device in text embedding collection and apply forward pass with no grad, by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/1984 +- πŸ”¨ WinCLIP improvements by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/1985 +- πŸš€ Update OpenVINO and ONNX export to support fixed input shape by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/2006 +- πŸ”¨ Update lightning inference by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/2018 +- ⬆️ Upgrade wandb by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2040 +- πŸ”¨ Refactor Export by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2057 +- ⬆️ Update `pyproject.toml` so `liccheck` can pick the license by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2074 +- ⬆️ Update timm requirement from <=0.9.16,>=0.5.4 to >=0.5.4,<=1.0.3 by @dependabot in https://github.com/openvinotoolkit/anomalib/pull/2075 +- πŸ”¨ Update model `README.md` files by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/2076 ### Deprecated +- πŸ—‘οΈ Remove labeler and update codeowners by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1946 +- πŸ—‘οΈ Remove requirements directory by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1945 +- πŸ—‘οΈ Remove Docker related files by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2039 +- πŸ—‘οΈ Remove references to nightly tests by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2048 +- πŸ—‘οΈ Remove unnecessary jsonargparse dependencies by @davnn in https://github.com/openvinotoolkit/anomalib/pull/2046 + ### Fixed +- 🐞 Fix dimensionality mismatch issue caused by the new kornia version by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1944 +- 🐞 Fix DFM PyTorch inference by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/1952 +- 🐞 Fix anomaly map shape to also work with tiling by @blaz-r in https://github.com/openvinotoolkit/anomalib/pull/1959 +- 🐞 Fix EfficientAD's pretrained weigths load path by @seyeon923 in https://github.com/openvinotoolkit/anomalib/pull/1966 +- 🐞 fixbug: use BinaryPrecisionRecallCurve instead of PrecisionRecallCurve by @rglkt in https://github.com/openvinotoolkit/anomalib/pull/1956 +- 🚨 Hotfix: compute precision recall on raw scores by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1973 +- 🐞 Minor fix to remove input_size from Padim config by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1988 +- 🐞 Fix Reverse Distillation export to ONNX by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/1990 +- 🐞 Fix DSR training when no GPU by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/2004 +- 🐞 Fix efficient ad by @abc-125 in https://github.com/openvinotoolkit/anomalib/pull/2015 +- 🐞 Fix keys in data configs to fit AnomalibDataModule parameters by @abc-125 in https://github.com/openvinotoolkit/anomalib/pull/2032 +- 🐞 Fix Export docstring in CLI by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2058 +- 🐞 Fix UFlow links by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/2059 + ### New Contributors -**Full Changelog**: +- @seyeon923 made their first contribution in https://github.com/openvinotoolkit/anomalib/pull/1966 +- @rglkt made their first contribution in https://github.com/openvinotoolkit/anomalib/pull/1956 +- @DoMaLi94 made their first contribution in https://github.com/openvinotoolkit/anomalib/pull/1847 + +**Full Changelog**: https://github.com/openvinotoolkit/anomalib/compare/v1.0.1...v1.1.0 ## [v1.0.1] - 2024-03-27 @@ -124,7 +176,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - πŸ”’ Replace `md5` with `sha-256` by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1680 - πŸ”¨ Refactor Visualisation by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1693 - πŸš€ Replace `albumentations` with `torchvision` transforms by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/1706 -- πŸ’₯ Create a script to upgrade v0.\* configuration format to v1 by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1738 +- πŸ’₯ Create a script to upgrade v0.\- configuration format to v1 by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1738 - πŸ”¨ Refactor type alias by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1742 - πŸ”¨ Remove Lightning dependencies from the CLI and Add `anomalib install` subcommand by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/1748 - πŸ”¨ Refactor `Engine.predict` method by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1772 @@ -376,7 +428,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Configure reference frame for multi-frame video clips () - Bump OpenVINO version to `2022.3.0` () - Remove the dependecy on a specific `torchvision` and `torchmetrics` packages. -- Bump PyTorch Lightning version to v.1.9.\* () +- Bump PyTorch Lightning version to v.1.9.\- () - Make input image normalization and center cropping configurable from config (https://github.com/openvinotoolkit/anomalib/pull/822) - Improve flexibility and configurability of subset splitting (https://github.com/openvinotoolkit/anomalib/pull/822) - Switch to new datamodules design (https://github.com/openvinotoolkit/anomalib/pull/822) diff --git a/src/anomalib/__init__.py b/src/anomalib/__init__.py index 711eb023e9..0ca656804e 100644 --- a/src/anomalib/__init__.py +++ b/src/anomalib/__init__.py @@ -5,7 +5,7 @@ from enum import Enum -__version__ = "1.1.0dev" +__version__ = "1.1.0" class LearningType(str, Enum): From f8fa2f23cd93ca95a4986347d85c1226068b4094 Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Thu, 30 May 2024 16:21:17 +0200 Subject: [PATCH 2/4] Add reference guide Signed-off-by: Ashwin Vaidya --- .../guides/reference/models/image/index.md | 2 + .../reference/pipelines/base/generator.md | 9 ++ .../guides/reference/pipelines/base/job.md | 9 ++ .../reference/pipelines/base/pipeline.md | 9 ++ .../pipelines/benchmark/generator.md | 10 ++ .../reference/pipelines/benchmark/index.md | 91 +++++++++++++++++++ .../reference/pipelines/benchmark/job.md | 10 ++ .../guides/reference/pipelines/index.md | 83 +++++++++++++++++ .../reference/pipelines/runners/index.md | 39 ++++++++ .../reference/pipelines/runners/parallel.md | 7 ++ .../reference/pipelines/runners/serial.md | 7 ++ .../pipelines/benchmark/cli_anomalib.txt | 2 + .../pipelines/benchmark/cli_tools.txt | 2 + .../pipelines/components/base/runner.py | 28 +++++- 14 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 docs/source/markdown/guides/reference/pipelines/base/generator.md create mode 100644 docs/source/markdown/guides/reference/pipelines/base/job.md create mode 100644 docs/source/markdown/guides/reference/pipelines/base/pipeline.md create mode 100644 docs/source/markdown/guides/reference/pipelines/benchmark/generator.md create mode 100644 docs/source/markdown/guides/reference/pipelines/benchmark/index.md create mode 100644 docs/source/markdown/guides/reference/pipelines/benchmark/job.md create mode 100644 docs/source/markdown/guides/reference/pipelines/index.md create mode 100644 docs/source/markdown/guides/reference/pipelines/runners/index.md create mode 100644 docs/source/markdown/guides/reference/pipelines/runners/parallel.md create mode 100644 docs/source/markdown/guides/reference/pipelines/runners/serial.md create mode 100644 docs/source/snippets/pipelines/benchmark/cli_anomalib.txt create mode 100644 docs/source/snippets/pipelines/benchmark/cli_tools.txt diff --git a/docs/source/markdown/guides/reference/models/image/index.md b/docs/source/markdown/guides/reference/models/image/index.md index e52dd24ed9..2229a3b15d 100644 --- a/docs/source/markdown/guides/reference/models/image/index.md +++ b/docs/source/markdown/guides/reference/models/image/index.md @@ -114,6 +114,8 @@ Student-Teacher Feature Pyramid Matching for Unsupervised Anomaly Detection :link-type: doc U-Flow: A U-shaped Normalizing Flow for Anomaly Detection with Unsupervised Threshold +::: + :::{grid-item-card} {material-regular}`model_training;1.5em` WinCLIP :link: ./winclip :link-type: doc diff --git a/docs/source/markdown/guides/reference/pipelines/base/generator.md b/docs/source/markdown/guides/reference/pipelines/base/generator.md new file mode 100644 index 0000000000..0249b06600 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/base/generator.md @@ -0,0 +1,9 @@ +# Generator + +```{eval-rst} +.. autoclass:: anomalib.pipelines.components.base.job.JobGenerator + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/base/job.md b/docs/source/markdown/guides/reference/pipelines/base/job.md new file mode 100644 index 0000000000..988516b5a7 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/base/job.md @@ -0,0 +1,9 @@ +# Job + +```{eval-rst} +.. autoclass:: anomalib.pipelines.components.base.job.Job + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/base/pipeline.md b/docs/source/markdown/guides/reference/pipelines/base/pipeline.md new file mode 100644 index 0000000000..dc9f11c743 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/base/pipeline.md @@ -0,0 +1,9 @@ +# Pipeline Base Class + +The `Pipeline` class is the base class for all pipelines. It provides the following methods: + +```{eval-rst} +.. automodule:: anomalib.pipelines.components.base.pipeline + :members: + :show-inheritance: +``` diff --git a/docs/source/markdown/guides/reference/pipelines/benchmark/generator.md b/docs/source/markdown/guides/reference/pipelines/benchmark/generator.md new file mode 100644 index 0000000000..e8e9a0f4ac --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/benchmark/generator.md @@ -0,0 +1,10 @@ +# Benchmark Job Generator + +```{eval-rst} + +.. autoclass:: anomalib.pipelines.benchmark.generator.BenchmarkJobGenerator + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/benchmark/index.md b/docs/source/markdown/guides/reference/pipelines/benchmark/index.md new file mode 100644 index 0000000000..93bae8627e --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/benchmark/index.md @@ -0,0 +1,91 @@ +# Benchmarking Pipeline + +The benchmarking pipeline allows you to run multiple models across combination of parameters and dataset categories to collect metrics. The benchmarking run is configured using a config file that specifies the grid-search parameters. A sample config file is shown below: + +```yaml +accelerator: + - cuda + - cpu +benchmark: + seed: 42 + model: + class_path: + grid_search: [Padim, Patchcore] + data: + class_path: MVTec + init_args: + category: + grid: + - bottle + - cable + - capsule +``` + +The `accelerator` parameter is specific to the pipeline and is used to configure the runners. When `cuda` is passed it adds a [parallel](../runners/parallel.md) runner with number of jobs equal to the number of cuda devices. The idea is that since job is independent, we can increase the throughput by distributing each on an individual accelerator. The `cpu` jobs are run [serially](../runners/serial.md). + +## Running the Benchmark Pipeline + +There are two ways to run the benchmark pipeline; as a subcommand, or as a standalone entrypoint. + +:::::{dropdown} CLI +:icon: code + +::::{tab-set} +:::{tab-item} Anomalib subcommand +:sync: label-1 + +```{literalinclude} ../../../../../snippets/pipelines/benchmark/cli_anomalib.txt +:language: bash +``` + +::: + +:::{tab-item} Standalone entrypoint +:sync: label-2 + +```{literalinclude} ../../../../../snippets/pipelines/benchmark/cli_tools.txt +:language: bash +``` + +::: + +::::: + +## Benchmark Pipeline Class + +```{eval-rst} + +.. autoclass:: anomalib.pipelines.benchmark.pipeline.Benchmark + :members: + :inherited-members: + :show-inheritance: + +``` + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} Job +:link: ./job +:link-type: doc + +Benchmark Job +::: + +:::{grid-item-card} Generator +:link: ./generator +:link-type: doc + +Benchmark Job Generator +::: + +:::: + +```{toctree} +:caption: Benchmark +:hidden: + +./job +./generator +``` diff --git a/docs/source/markdown/guides/reference/pipelines/benchmark/job.md b/docs/source/markdown/guides/reference/pipelines/benchmark/job.md new file mode 100644 index 0000000000..d445c6c931 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/benchmark/job.md @@ -0,0 +1,10 @@ +# Benchmark Job + +```{eval-rst} + +.. autoclass:: anomalib.pipelines.benchmark.job.BenchmarkJob + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/index.md b/docs/source/markdown/guides/reference/pipelines/index.md new file mode 100644 index 0000000000..09537aa39c --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/index.md @@ -0,0 +1,83 @@ +# Pipelines + +```{danger} +The pipelines feature is experimental and might be changed without backward compatibility. +``` + +## Introduction + +Tasks such as Benchmarking, Ensemble Tiling, and Hyper-parameter optimization requires running multiple models and chaining multiple stages together. The pipelines feature provides a way to define and run such tasks. Each part of the pipeline is designed to be independent and composable so that they can be reused across different pipelines. + +## Terminology + +- **Pipeline**: Pipeline is the main entity that defines the sequence of [jobs](./base/job.md) to be executed. It is responsible for creating and running the jobs. The job itself is generated using a [job generator](./base/generator.md). And, these are chained using a [runner](./runners/index.md). + +- **Runner**: A runner is responsible for scheduling and running the jobs. It also passes the output of the previous job, if available. It also calls the right hooks to gather and save the results from the jobs and passes the gathered results to the next runner. + +- **Job Generator**: The job generator is responsible for generating jobs based on the configuration. It is used by the runner to create jobs. + +- **Job**: A job is an atomic unit of work that can be run independently. It is responsible for running a single task. For example, training a model or computing metrics. The idea behind this is to ensure that it can be attached to any runner without making changes to the job itself. This is useful when you want to distribute the jobs to increase the throughput of your pipeline. + +```{admonition} Detailed Walkthrough +:class: tip +For more clarity on creating a custom pipeline, refer to the [How-To Guide](../../how_to/pipelines/index.md). +``` + +## Base classes + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} {octicon}`workflow` Pipeline +:link: ./base/pipeline +:link-type: doc + +Base class for pipeline. +::: + +:::{grid-item-card} {octicon}`file` Job +:link: ./base/job +:link-type: doc + +Base class for job. +::: + +:::{grid-item-card} {octicon}`iterations` Job Generator +:link: ./base/generator +:link-type: doc + +Base class for job generator. +::: + +:::{grid-item-card} {octicon}`play` Runner +:link: ./runners/index +:link-type: doc + +Base class for runner. +::: + +:::: + +## Available Pipelines + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} {octicon}`number` Benchmarking +:link: ./benchmark/index +:link-type: doc + +Compute metrics across models using a grid-search. +::: + +:::: + +```{toctree} +:caption: Pipelines +:hidden: + +./benchmark/index +./runners/index +``` diff --git a/docs/source/markdown/guides/reference/pipelines/runners/index.md b/docs/source/markdown/guides/reference/pipelines/runners/index.md new file mode 100644 index 0000000000..374eec4852 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/runners/index.md @@ -0,0 +1,39 @@ +# Runner + +```{eval-rst} +.. autoclass:: anomalib.pipelines.components.base.runner.Runner + :members: + +``` + +## Available Runners + +Anomalib provides a few runners that can be used in your pipelines. + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} {octicon}`list-ordered` Serial Runner +:link: ./serial +:link-type: doc + +Runner for serial jobs. +::: + +:::{grid-item-card} {octicon}`git-branch` Parallel Runner +:link: ./parallel +:link-type: doc + +Runner for parallel jobs. +::: + +:::: + +```{toctree} +:caption: Runners +:hidden: + +./serial +./parallel +``` diff --git a/docs/source/markdown/guides/reference/pipelines/runners/parallel.md b/docs/source/markdown/guides/reference/pipelines/runners/parallel.md new file mode 100644 index 0000000000..9e1c9e83b2 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/runners/parallel.md @@ -0,0 +1,7 @@ +# Parallel Runner + +```{eval-rst} +.. automodule:: anomalib.pipelines.components.runners.parallel + :members: + :show-inheritance: +``` diff --git a/docs/source/markdown/guides/reference/pipelines/runners/serial.md b/docs/source/markdown/guides/reference/pipelines/runners/serial.md new file mode 100644 index 0000000000..d9fdb89e32 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/runners/serial.md @@ -0,0 +1,7 @@ +# Serial Runner + +```{eval-rst} +.. automodule:: anomalib.pipelines.components.runners.serial + :members: + :show-inheritance: +``` diff --git a/docs/source/snippets/pipelines/benchmark/cli_anomalib.txt b/docs/source/snippets/pipelines/benchmark/cli_anomalib.txt new file mode 100644 index 0000000000..e28dc5cb22 --- /dev/null +++ b/docs/source/snippets/pipelines/benchmark/cli_anomalib.txt @@ -0,0 +1,2 @@ +# Using Anomalib entrypoint +anomalib benchmark --config tools/experimental/benchmarking/sample.yaml diff --git a/docs/source/snippets/pipelines/benchmark/cli_tools.txt b/docs/source/snippets/pipelines/benchmark/cli_tools.txt new file mode 100644 index 0000000000..df86e64ac3 --- /dev/null +++ b/docs/source/snippets/pipelines/benchmark/cli_tools.txt @@ -0,0 +1,2 @@ +# Using Entrypoint in tools +python tools/experimental/benchmarking/benchmark.py --config tools/experimental/benchmarking/sample.yaml diff --git a/src/anomalib/pipelines/components/base/runner.py b/src/anomalib/pipelines/components/base/runner.py index 54fa0c5f31..cee46dfacb 100644 --- a/src/anomalib/pipelines/components/base/runner.py +++ b/src/anomalib/pipelines/components/base/runner.py @@ -11,11 +11,35 @@ class Runner(ABC): - """Base runner.""" + """Base runner. + + Args: + generator (JobGenerator): Job generator. + """ def __init__(self, generator: JobGenerator) -> None: self.generator = generator @abstractmethod def run(self, args: dict, prev_stage_results: PREV_STAGE_RESULT = None) -> GATHERED_RESULTS: - """Run the pipeline.""" + """Run the pipeline. + + Args: + args (dict): Arguments specific to the job. For example, if there is a pipeline defined where one of the job + generators is hyperparameter optimization, then the pipeline configuration file will look something like + ```yaml + arg1: + arg2: + hpo: + param1: + param2: + ... + ``` + In this case, the `args` will receive a dictionary with all keys under `hpo`. + + prev_stage_results (PREV_STAGE_RESULT, optional): Previous stage results. This is useful when the current + stage depends on the results of the previous stage. Defaults to None. + + Returns: + GATHERED_RESULTS: Gathered results from all the jobs. + """ From 74730907b9eee300aef04a7ed26174f3137bdc2e Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Fri, 31 May 2024 14:31:40 +0200 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=93=9A=20Add=20Pipelines=20Documentat?= =?UTF-8?q?ion=20(#2096)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add reference guide Signed-off-by: Ashwin Vaidya --- .../guides/reference/models/image/index.md | 2 + .../reference/pipelines/base/generator.md | 9 ++ .../guides/reference/pipelines/base/job.md | 9 ++ .../reference/pipelines/base/pipeline.md | 9 ++ .../pipelines/benchmark/generator.md | 10 ++ .../reference/pipelines/benchmark/index.md | 91 +++++++++++++++++++ .../reference/pipelines/benchmark/job.md | 10 ++ .../guides/reference/pipelines/index.md | 83 +++++++++++++++++ .../reference/pipelines/runners/index.md | 39 ++++++++ .../reference/pipelines/runners/parallel.md | 7 ++ .../reference/pipelines/runners/serial.md | 7 ++ .../pipelines/benchmark/cli_anomalib.txt | 2 + .../pipelines/benchmark/cli_tools.txt | 2 + .../pipelines/components/base/runner.py | 28 +++++- 14 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 docs/source/markdown/guides/reference/pipelines/base/generator.md create mode 100644 docs/source/markdown/guides/reference/pipelines/base/job.md create mode 100644 docs/source/markdown/guides/reference/pipelines/base/pipeline.md create mode 100644 docs/source/markdown/guides/reference/pipelines/benchmark/generator.md create mode 100644 docs/source/markdown/guides/reference/pipelines/benchmark/index.md create mode 100644 docs/source/markdown/guides/reference/pipelines/benchmark/job.md create mode 100644 docs/source/markdown/guides/reference/pipelines/index.md create mode 100644 docs/source/markdown/guides/reference/pipelines/runners/index.md create mode 100644 docs/source/markdown/guides/reference/pipelines/runners/parallel.md create mode 100644 docs/source/markdown/guides/reference/pipelines/runners/serial.md create mode 100644 docs/source/snippets/pipelines/benchmark/cli_anomalib.txt create mode 100644 docs/source/snippets/pipelines/benchmark/cli_tools.txt diff --git a/docs/source/markdown/guides/reference/models/image/index.md b/docs/source/markdown/guides/reference/models/image/index.md index e52dd24ed9..2229a3b15d 100644 --- a/docs/source/markdown/guides/reference/models/image/index.md +++ b/docs/source/markdown/guides/reference/models/image/index.md @@ -114,6 +114,8 @@ Student-Teacher Feature Pyramid Matching for Unsupervised Anomaly Detection :link-type: doc U-Flow: A U-shaped Normalizing Flow for Anomaly Detection with Unsupervised Threshold +::: + :::{grid-item-card} {material-regular}`model_training;1.5em` WinCLIP :link: ./winclip :link-type: doc diff --git a/docs/source/markdown/guides/reference/pipelines/base/generator.md b/docs/source/markdown/guides/reference/pipelines/base/generator.md new file mode 100644 index 0000000000..0249b06600 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/base/generator.md @@ -0,0 +1,9 @@ +# Generator + +```{eval-rst} +.. autoclass:: anomalib.pipelines.components.base.job.JobGenerator + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/base/job.md b/docs/source/markdown/guides/reference/pipelines/base/job.md new file mode 100644 index 0000000000..988516b5a7 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/base/job.md @@ -0,0 +1,9 @@ +# Job + +```{eval-rst} +.. autoclass:: anomalib.pipelines.components.base.job.Job + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/base/pipeline.md b/docs/source/markdown/guides/reference/pipelines/base/pipeline.md new file mode 100644 index 0000000000..dc9f11c743 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/base/pipeline.md @@ -0,0 +1,9 @@ +# Pipeline Base Class + +The `Pipeline` class is the base class for all pipelines. It provides the following methods: + +```{eval-rst} +.. automodule:: anomalib.pipelines.components.base.pipeline + :members: + :show-inheritance: +``` diff --git a/docs/source/markdown/guides/reference/pipelines/benchmark/generator.md b/docs/source/markdown/guides/reference/pipelines/benchmark/generator.md new file mode 100644 index 0000000000..e8e9a0f4ac --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/benchmark/generator.md @@ -0,0 +1,10 @@ +# Benchmark Job Generator + +```{eval-rst} + +.. autoclass:: anomalib.pipelines.benchmark.generator.BenchmarkJobGenerator + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/benchmark/index.md b/docs/source/markdown/guides/reference/pipelines/benchmark/index.md new file mode 100644 index 0000000000..93bae8627e --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/benchmark/index.md @@ -0,0 +1,91 @@ +# Benchmarking Pipeline + +The benchmarking pipeline allows you to run multiple models across combination of parameters and dataset categories to collect metrics. The benchmarking run is configured using a config file that specifies the grid-search parameters. A sample config file is shown below: + +```yaml +accelerator: + - cuda + - cpu +benchmark: + seed: 42 + model: + class_path: + grid_search: [Padim, Patchcore] + data: + class_path: MVTec + init_args: + category: + grid: + - bottle + - cable + - capsule +``` + +The `accelerator` parameter is specific to the pipeline and is used to configure the runners. When `cuda` is passed it adds a [parallel](../runners/parallel.md) runner with number of jobs equal to the number of cuda devices. The idea is that since job is independent, we can increase the throughput by distributing each on an individual accelerator. The `cpu` jobs are run [serially](../runners/serial.md). + +## Running the Benchmark Pipeline + +There are two ways to run the benchmark pipeline; as a subcommand, or as a standalone entrypoint. + +:::::{dropdown} CLI +:icon: code + +::::{tab-set} +:::{tab-item} Anomalib subcommand +:sync: label-1 + +```{literalinclude} ../../../../../snippets/pipelines/benchmark/cli_anomalib.txt +:language: bash +``` + +::: + +:::{tab-item} Standalone entrypoint +:sync: label-2 + +```{literalinclude} ../../../../../snippets/pipelines/benchmark/cli_tools.txt +:language: bash +``` + +::: + +::::: + +## Benchmark Pipeline Class + +```{eval-rst} + +.. autoclass:: anomalib.pipelines.benchmark.pipeline.Benchmark + :members: + :inherited-members: + :show-inheritance: + +``` + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} Job +:link: ./job +:link-type: doc + +Benchmark Job +::: + +:::{grid-item-card} Generator +:link: ./generator +:link-type: doc + +Benchmark Job Generator +::: + +:::: + +```{toctree} +:caption: Benchmark +:hidden: + +./job +./generator +``` diff --git a/docs/source/markdown/guides/reference/pipelines/benchmark/job.md b/docs/source/markdown/guides/reference/pipelines/benchmark/job.md new file mode 100644 index 0000000000..d445c6c931 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/benchmark/job.md @@ -0,0 +1,10 @@ +# Benchmark Job + +```{eval-rst} + +.. autoclass:: anomalib.pipelines.benchmark.job.BenchmarkJob + :members: + :inherited-members: + :show-inheritance: + +``` diff --git a/docs/source/markdown/guides/reference/pipelines/index.md b/docs/source/markdown/guides/reference/pipelines/index.md new file mode 100644 index 0000000000..09537aa39c --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/index.md @@ -0,0 +1,83 @@ +# Pipelines + +```{danger} +The pipelines feature is experimental and might be changed without backward compatibility. +``` + +## Introduction + +Tasks such as Benchmarking, Ensemble Tiling, and Hyper-parameter optimization requires running multiple models and chaining multiple stages together. The pipelines feature provides a way to define and run such tasks. Each part of the pipeline is designed to be independent and composable so that they can be reused across different pipelines. + +## Terminology + +- **Pipeline**: Pipeline is the main entity that defines the sequence of [jobs](./base/job.md) to be executed. It is responsible for creating and running the jobs. The job itself is generated using a [job generator](./base/generator.md). And, these are chained using a [runner](./runners/index.md). + +- **Runner**: A runner is responsible for scheduling and running the jobs. It also passes the output of the previous job, if available. It also calls the right hooks to gather and save the results from the jobs and passes the gathered results to the next runner. + +- **Job Generator**: The job generator is responsible for generating jobs based on the configuration. It is used by the runner to create jobs. + +- **Job**: A job is an atomic unit of work that can be run independently. It is responsible for running a single task. For example, training a model or computing metrics. The idea behind this is to ensure that it can be attached to any runner without making changes to the job itself. This is useful when you want to distribute the jobs to increase the throughput of your pipeline. + +```{admonition} Detailed Walkthrough +:class: tip +For more clarity on creating a custom pipeline, refer to the [How-To Guide](../../how_to/pipelines/index.md). +``` + +## Base classes + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} {octicon}`workflow` Pipeline +:link: ./base/pipeline +:link-type: doc + +Base class for pipeline. +::: + +:::{grid-item-card} {octicon}`file` Job +:link: ./base/job +:link-type: doc + +Base class for job. +::: + +:::{grid-item-card} {octicon}`iterations` Job Generator +:link: ./base/generator +:link-type: doc + +Base class for job generator. +::: + +:::{grid-item-card} {octicon}`play` Runner +:link: ./runners/index +:link-type: doc + +Base class for runner. +::: + +:::: + +## Available Pipelines + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} {octicon}`number` Benchmarking +:link: ./benchmark/index +:link-type: doc + +Compute metrics across models using a grid-search. +::: + +:::: + +```{toctree} +:caption: Pipelines +:hidden: + +./benchmark/index +./runners/index +``` diff --git a/docs/source/markdown/guides/reference/pipelines/runners/index.md b/docs/source/markdown/guides/reference/pipelines/runners/index.md new file mode 100644 index 0000000000..374eec4852 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/runners/index.md @@ -0,0 +1,39 @@ +# Runner + +```{eval-rst} +.. autoclass:: anomalib.pipelines.components.base.runner.Runner + :members: + +``` + +## Available Runners + +Anomalib provides a few runners that can be used in your pipelines. + +::::{grid} +:margin: 1 1 0 0 +:gutter: 1 + +:::{grid-item-card} {octicon}`list-ordered` Serial Runner +:link: ./serial +:link-type: doc + +Runner for serial jobs. +::: + +:::{grid-item-card} {octicon}`git-branch` Parallel Runner +:link: ./parallel +:link-type: doc + +Runner for parallel jobs. +::: + +:::: + +```{toctree} +:caption: Runners +:hidden: + +./serial +./parallel +``` diff --git a/docs/source/markdown/guides/reference/pipelines/runners/parallel.md b/docs/source/markdown/guides/reference/pipelines/runners/parallel.md new file mode 100644 index 0000000000..9e1c9e83b2 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/runners/parallel.md @@ -0,0 +1,7 @@ +# Parallel Runner + +```{eval-rst} +.. automodule:: anomalib.pipelines.components.runners.parallel + :members: + :show-inheritance: +``` diff --git a/docs/source/markdown/guides/reference/pipelines/runners/serial.md b/docs/source/markdown/guides/reference/pipelines/runners/serial.md new file mode 100644 index 0000000000..d9fdb89e32 --- /dev/null +++ b/docs/source/markdown/guides/reference/pipelines/runners/serial.md @@ -0,0 +1,7 @@ +# Serial Runner + +```{eval-rst} +.. automodule:: anomalib.pipelines.components.runners.serial + :members: + :show-inheritance: +``` diff --git a/docs/source/snippets/pipelines/benchmark/cli_anomalib.txt b/docs/source/snippets/pipelines/benchmark/cli_anomalib.txt new file mode 100644 index 0000000000..e28dc5cb22 --- /dev/null +++ b/docs/source/snippets/pipelines/benchmark/cli_anomalib.txt @@ -0,0 +1,2 @@ +# Using Anomalib entrypoint +anomalib benchmark --config tools/experimental/benchmarking/sample.yaml diff --git a/docs/source/snippets/pipelines/benchmark/cli_tools.txt b/docs/source/snippets/pipelines/benchmark/cli_tools.txt new file mode 100644 index 0000000000..df86e64ac3 --- /dev/null +++ b/docs/source/snippets/pipelines/benchmark/cli_tools.txt @@ -0,0 +1,2 @@ +# Using Entrypoint in tools +python tools/experimental/benchmarking/benchmark.py --config tools/experimental/benchmarking/sample.yaml diff --git a/src/anomalib/pipelines/components/base/runner.py b/src/anomalib/pipelines/components/base/runner.py index 54fa0c5f31..cee46dfacb 100644 --- a/src/anomalib/pipelines/components/base/runner.py +++ b/src/anomalib/pipelines/components/base/runner.py @@ -11,11 +11,35 @@ class Runner(ABC): - """Base runner.""" + """Base runner. + + Args: + generator (JobGenerator): Job generator. + """ def __init__(self, generator: JobGenerator) -> None: self.generator = generator @abstractmethod def run(self, args: dict, prev_stage_results: PREV_STAGE_RESULT = None) -> GATHERED_RESULTS: - """Run the pipeline.""" + """Run the pipeline. + + Args: + args (dict): Arguments specific to the job. For example, if there is a pipeline defined where one of the job + generators is hyperparameter optimization, then the pipeline configuration file will look something like + ```yaml + arg1: + arg2: + hpo: + param1: + param2: + ... + ``` + In this case, the `args` will receive a dictionary with all keys under `hpo`. + + prev_stage_results (PREV_STAGE_RESULT, optional): Previous stage results. This is useful when the current + stage depends on the results of the previous stage. Defaults to None. + + Returns: + GATHERED_RESULTS: Gathered results from all the jobs. + """ From 6ef356894381fcb1d9457b7decf419a1a669ce36 Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Tue, 4 Jun 2024 11:54:36 +0200 Subject: [PATCH 4/4] Add how to guide Signed-off-by: Ashwin Vaidya --- docs/source/markdown/guides/how_to/index.md | 3 + .../markdown/guides/how_to/pipelines/index.md | 254 ++++++++++++++++++ .../reference/pipelines/runners/parallel.md | 4 + .../snippets/pipelines/dummy/anomalib_cli.txt | 1 + .../pipelines/dummy/pipeline_parallel.txt | 9 + .../pipelines/dummy/pipeline_serial.txt | 9 + .../pipelines/dummy/significance_job.txt | 48 ++++ .../dummy/significance_job_generator.txt | 16 ++ .../pipelines/dummy/src_dir_structure.txt | 7 + .../pipelines/dummy/tools_dir_structure.txt | 4 + .../pipelines/dummy/train_generator.txt | 19 ++ .../snippets/pipelines/dummy/train_job.txt | 31 +++ 12 files changed, 405 insertions(+) create mode 100644 docs/source/markdown/guides/how_to/pipelines/index.md create mode 100644 docs/source/snippets/pipelines/dummy/anomalib_cli.txt create mode 100644 docs/source/snippets/pipelines/dummy/pipeline_parallel.txt create mode 100644 docs/source/snippets/pipelines/dummy/pipeline_serial.txt create mode 100644 docs/source/snippets/pipelines/dummy/significance_job.txt create mode 100644 docs/source/snippets/pipelines/dummy/significance_job_generator.txt create mode 100644 docs/source/snippets/pipelines/dummy/src_dir_structure.txt create mode 100644 docs/source/snippets/pipelines/dummy/tools_dir_structure.txt create mode 100644 docs/source/snippets/pipelines/dummy/train_generator.txt create mode 100644 docs/source/snippets/pipelines/dummy/train_job.txt diff --git a/docs/source/markdown/guides/how_to/index.md b/docs/source/markdown/guides/how_to/index.md index 87515147be..a8eb3fca22 100644 --- a/docs/source/markdown/guides/how_to/index.md +++ b/docs/source/markdown/guides/how_to/index.md @@ -57,6 +57,8 @@ Learn more about anomalib's deployment capabilities ::: :::{grid-item-card} {octicon}`workflow` Pipelines +:link: ./pipelines/index +:link-type: doc Learn more about anomalib hpo, sweep and benchmarking pipelines ::: @@ -69,4 +71,5 @@ Learn more about anomalib hpo, sweep and benchmarking pipelines ./data/index ./models/index +./pipelines/index ``` diff --git a/docs/source/markdown/guides/how_to/pipelines/index.md b/docs/source/markdown/guides/how_to/pipelines/index.md new file mode 100644 index 0000000000..ed3d66f81d --- /dev/null +++ b/docs/source/markdown/guides/how_to/pipelines/index.md @@ -0,0 +1,254 @@ +# Pipelines + +This guide demonstrates how to create a [Pipeline](../../reference/pipelines/index.md) for your custom task. + +A pipeline is made up of runners. These runners are responsible for running a single type of job. A job is the smallest unit of work that is independent, such as, training a model or statistical comparison of the outputs of two models. Each job should be designed to be independent of other jobs so that they are agnostic to the runner that is running them. This ensures that the job can be run in parallel or serially without any changes to the job itself. The runner does not directly instantiate a job but rather has a job generator that generates the job based on the configuration. This generator is responsible for parsing the config and generating the job. + +## Birds Eye View + +In this guide we are going to create a dummy significant parameter search pipeline. The pipeline will have two jobs. The first job trains a model and computes the metric. The second job computes the significance of the parameters to the final score using shapely values. The final output of the pipeline is a plot that shows the contribution of each parameter to the final score. This will help teach you how to create a pipeline, a job, a job generator, and how to expose it to the `anomalib` CLI. The pipeline is going to be named `experiment`. So by the end of this you will be able to generate significance plot using + +```{literalinclude} ../../../../snippets/pipelines/dummy/anomalib_cli.txt +:language: bash +``` + +The final directory structure will look as follows: + +```{literalinclude} ../../../../snippets/pipelines/dummy/src_dir_structure.txt + +``` + +```{literalinclude} ../../../../snippets/pipelines/dummy/tools_dir_structure.txt +:language: bash +``` + +## Creating the Jobs + +Let's first look at the base class for the [jobs](../../reference/pipelines/base/job.md). It has a few methods defined. + +- The `run` method is the main method that is called by the runner. This is where we will train the model and return the model metrics. +- The `collect` method is used to gather the results from all the runs and collate them. This is handy as we want to pass a single object to the next job that contains details of all the runs including the final score. +- The `save` method is used to write any artifacts to the disk. It accepts the gathered results as a parameter. This is useful in a variety of situations. Say, when we want to write the results in a csv file or write the raw anomaly maps for further processing. + +Let's create the first job that trains the model and computes the metric. Since it is a dummy example, we will just return a random number as the metric. + +```python +class TrainJob(Job): + name = "train" + + def __init__(self, lr: float, backbone: str, stride: int): + self.lr = lr + self.backbone = backbone + self.stride = stride + + def run(self, task_id: int | None = None) -> dict: + print(f"Training with lr: {self.lr}, backbone: {self.backbone}, stride: {self.stride}") + time.sleep(2) + score = np.random.uniform(0.7, 0.1) + return {"lr": self.lr, "backbone": self.backbone, "stride": self.stride, "score": score} +``` + +Ignore the `task_id` for now. It is used for parallel jobs. We will come back to it later. + +````{note} +The `name` attribute is important and is used to identify the arguments in the job config file. +So, in our case the config `yaml` file will contain an entry like this: + +```yaml +... +train: + lr: + backbone: + stride: +... +```` + +Of course, it is up to us to choose what parameters should be shown under the `train` key. + +Let's also add the `collect` method so that we return a nice dict object that can be used by the next job. + +```python +def collect(results: list[dict]) -> dict: + output: dict = {} + for key in results[0]: + output[key] = [] + for result in results: + for key, value in result.items(): + output[key].append(value) + return output +``` + +We can also define a `save` method that writes the dictionary as a csv file. + +```python +@staticmethod +def save(results: dict) -> None: + """Save results in a csv file.""" + results_df = pd.DataFrame(results) + file_path = Path("runs") / TrainJob.name + file_path.mkdir(parents=True, exist_ok=True) + results_df.to_csv(file_path / "results.csv", index=False) +``` + +The entire job class is shown below. + +```{literalinclude} ../../../../snippets/pipelines/dummy/train_job.txt +:language: python +``` + +Now we need a way to generate this job when the pipeline is run. To do this we need to subclass the [JobGenerator](../../reference/pipelines/base/generator.md) class. + +The job generator is the actual object that is attached to a runner and is responsible for parsing the configuration and generating jobs. It has two methods that need to be implemented. + +- `generate_job`: This method accepts the configuration as a dictionary and, optionally, the results of the previous job. For the train job, we don't need results for previous jobs, so we will ignore it. +- `job_class`: This holds the reference to the class of the job that the generator will yield. It is used to inform the runner about the job that is being run, and is used to access the static attributes of the job such as its name, collect method, etc. + +Let's first start by defining the configuration that the generator will accept. The train job requires three parameters: `lr`, `backbone`, and `stride`. We will also add another parameter that defines the number of experiments we want to run. One way to define it would be as follows: + +```yaml +train: + experiments: 10 + lr: [0.1, 0.99] + backbone: + - resnet18 + - wide_resnet50 + stride: + - 3 + - 5 +``` + +For this example the specification is defined as follows. + +1. The number of experiments is set to 10. +2. Learning rate is sampled from a uniform distribution in the range `[0.1, 0.99]`. +3. The backbone is chosen from the list `["resnet18", "wide_resnet50"]`. +4. The stride is chosen from the list `[3, 5]`. + +```{note} +While the `[ ]` and `-` syntax in `yaml` both signify a list, for visual disambiguation this example uses `[ ]` to denote closed interval and `-` for a list of options. +``` + +With this defined, we can define the generator class as follows. + +```{literalinclude} ../../../../snippets/pipelines/dummy/train_generator.txt +:language: python +``` + +Since this is a dummy example, we generate the next experiment randomly. In practice, you would use a more sophisticated method that relies on your validation metrics to generate the next experiment. + +```{admonition} Challenge +:class: tip +For a challenge define your own configuration and a generator to parse that configuration. +``` + +Okay, so now we can train the model. We still need a way to find out which parameters contribute the most to the final score. We will do this by computing the shapely values to find out the contribution of each parameter to the final score. + +Let's first start by adding the library to our environment + +```bash +pip install shap +``` + +The following listing shows the job that computes the shapely values and saves a plot that shows the contribution of each parameter to the final score. A quick rundown without going into the details of the job (as it is irrelevant to the pipeline) is as follows. We create a `RandomForestRegressor` that is trained on the parameters to predict the final score. We then compute the shapely values to identify the parameters that have the most significant impact on the model performance. Finally, the `save` method saves the plot so we can visually inspect the results. + +```{literalinclude} ../../../../snippets/pipelines/dummy/significance_job.txt + +``` + +Great! Now we have the job, as before, we need the generator. Since we only need the results from the previous stage, we don't need to define the config. Let's quickly write that as well. + +```{literalinclude} ../../../../snippets/pipelines/dummy/significance_job_generator.txt + +``` + +## Experiment Pipeline + +So now we have the jobs, and a way to generate them. Let's look at how we can chain them together to achieve what we want. We will use the [Pipeline](../../reference/pipelines/base/pipeline.md) class to define the pipeline. + +When creating a custom pipeline, there is only one important method that we need to implement. That is the `_setup_runners` method. This is where we chain the runners together. + +```{literalinclude} ../../../../snippets/pipelines/dummy/pipeline_serial.txt +:language: python +``` + +In this example we use `SerialRunner` for running each job. It is a simple runner that runs the jobs in a serial manner. For more information on `SerialRunner` look [here](../../reference/pipelines/runners/serial.md). + +Okay, so we have the pipeline. How do we run it? To do this let's create a simple entrypoint in `tools` folder of Anomalib. + +Here is how the directory looks. + +```{literalinclude} ../../../../snippets/pipelines/dummy/tools_dir_structure.txt +:language: bash +``` + +As you can see, we have the `config.yaml` file in the same directory. Let's quickly populate `experiment.py`. + +```python +from anomalib.pipelines.experiment_pipeline import ExperimentPipeline + +if __name__ == "__main__": + ExperimentPipeline().run() +``` + +Alright! Time to take it on the road. + +```bash +python tools/experimental/experiment/experiment.py --config tools/experimental/experiment/config.yaml +``` + +If all goes well you should see the summary plot in `runs/significant_feature/summary_plot.png`. + +## Exposing to the CLI + +Now that you have your shiny new pipeline, you can expose it as a subcommand to `anomalib` by adding an entry to the pipeline registry in `anomalib/cli/pipelines.py`. + +```python +if try_import("anomalib.pipelines"): + ... + from anomalib.pipelines import ExperimentPipeline + +PIPELINE_REGISTRY: dict[str, type[Pipeline]] | None = { + "experiment": ExperimentPipeline, + ... +} +``` + +With this you can now call + +```{literalinclude} ../../../../snippets/pipelines/dummy/anomalib_cli.txt +:language: bash +``` + +Congratulations! You have successfully created a pipeline that trains a model and computes the significance of the parameters to the final score πŸŽ‰ + +```{admonition} Challenge +:class: tip +This example used a random model hence the scores were meaningless. Try to implement a real model and compute the scores. Look into which parameters lead to the most significant contribution to your score. +``` + +## Final Tweaks + +Before we end, let's look at a few final tweaks that you can make to the pipeline. + +First, let's run the initial model training in parallel. Since all jobs are independent, we can use the [ParallelRunner](../../reference/pipelines/runners/parallel.md). Since the `TrainJob` is a dummy job in this example, the pool of parallel jobs is set to the number of experiments. + +```{literalinclude} ../../../../snippets/pipelines/dummy/pipeline_parallel.txt + +``` + +You now notice that the entire pipeline takes lesser time to run. This is handy when you have large number of experiments, and when each job takes substantial time to run. + +Now on to the second one. When running the pipeline we don't want our terminal cluttered with the outputs from each run. Anomalib provides a handy decorator that temporarily hides the output of a function. It suppresses all outputs to the standard out and the standard error unless an exception is raised. Let's add this to the `TrainJob` + +```python +from anomalib.utils.logging import hide_output + +class TrainJob(Job): + ... + + @hide_output + def run(self, task_id: int | None = None) -> dict: + ... +``` + +You will no longer see the output of the `print` statement in the `TrainJob` method in the terminal. diff --git a/docs/source/markdown/guides/reference/pipelines/runners/parallel.md b/docs/source/markdown/guides/reference/pipelines/runners/parallel.md index 9e1c9e83b2..145e6f5015 100644 --- a/docs/source/markdown/guides/reference/pipelines/runners/parallel.md +++ b/docs/source/markdown/guides/reference/pipelines/runners/parallel.md @@ -1,5 +1,9 @@ # Parallel Runner +The parallel runners creates a pool of runners with the pool size equal to the number defined when creating the runner. + +Each process in the pool has a process id assigned to it that is between 0-`n_jobs`. When a job is run using the parallel runner, the process id is passed to the job. The job can use this id to determine process specific logic. For example, if the pool size is equal to the number of GPUs, the job can use the process id to assign a specific GPU to the process. + ```{eval-rst} .. automodule:: anomalib.pipelines.components.runners.parallel :members: diff --git a/docs/source/snippets/pipelines/dummy/anomalib_cli.txt b/docs/source/snippets/pipelines/dummy/anomalib_cli.txt new file mode 100644 index 0000000000..3ce5250fbe --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/anomalib_cli.txt @@ -0,0 +1 @@ +anomalib experiment --config tools/experimental/experiment/config.yaml diff --git a/docs/source/snippets/pipelines/dummy/pipeline_parallel.txt b/docs/source/snippets/pipelines/dummy/pipeline_parallel.txt new file mode 100644 index 0000000000..871b218c03 --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/pipeline_parallel.txt @@ -0,0 +1,9 @@ +class ExperimentPipeline(Pipeline): + """Experiment pipeline.""" + + def _setup_runners(self, args: dict) -> list[Runner]: + """Setup the runners for the pipeline.""" + return [ + ParallelRunner(TrainJobGenerator(), n_jobs=args["train"]["experiments"]), + SerialRunner(SignificanceJobGenerator()), + ] diff --git a/docs/source/snippets/pipelines/dummy/pipeline_serial.txt b/docs/source/snippets/pipelines/dummy/pipeline_serial.txt new file mode 100644 index 0000000000..dfbd62ee2d --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/pipeline_serial.txt @@ -0,0 +1,9 @@ +class ExperimentPipeline(Pipeline): + """Experiment pipeline.""" + + def _setup_runners(self, args: dict) -> list[Runner]: + """Setup the runners for the pipeline.""" + return [ + SerialRunner(TrainJobGenerator()), + SerialRunner(SignificanceJobGenerator()), + ] diff --git a/docs/source/snippets/pipelines/dummy/significance_job.txt b/docs/source/snippets/pipelines/dummy/significance_job.txt new file mode 100644 index 0000000000..592e332368 --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/significance_job.txt @@ -0,0 +1,48 @@ +class SignificantFeatureJob(Job): + """Compute the most significant feature.""" + + name = "significant_feature" + + def __init__(self, metrics_df: pd.DataFrame) -> None: + self.metrics_df = metrics_df + + def run(self, task_id: int | None = None) -> tuple[Explanation, pd.DataFrame]: + """Fit a RandomForestRegressor to compute the shapely values.""" + x_features = self.metrics_df.drop(columns=["score"]) + target = self.metrics_df["score"] + + # Convert categorical features to one-hot encoding + categorical_features = ["backbone", "stride"] + encoder = OneHotEncoder(sparse_output=False) + x_encoded = pd.DataFrame(encoder.fit_transform(x_features[categorical_features])) + x_encoded.columns = encoder.get_feature_names_out(categorical_features) + + # Combine encoded categorical features with the rest of the data + x_features = x_features.drop(columns=categorical_features) + x_features = pd.concat([x_features, x_encoded], axis=1) + + # We don't split the data as the random forest is a proxy to compute the shapely values + # Train the model + model = RandomForestRegressor() + model.fit(X=x_features, y=target) + + # Initialize the SHAP explainer + explainer = shap.Explainer(model, x_features) + + # Calculate SHAP values + shap_values = explainer(x_features) + return shap_values, x_features + + @staticmethod + def collect(results: list[tuple[Explanation, pd.DataFrame]]) -> Explanation: + """We only need to run this once.""" + return results[0] + + @staticmethod + def save(results: tuple[Explanation, pd.DataFrame]) -> None: + """Save the results in a plot.""" + shapely_values, features = results + shap.summary_plot(shapely_values, features, show=False, plot_type="violin") + file_path = Path("runs") / SignificantFeatureJob.name + file_path.mkdir(parents=True, exist_ok=True) + plt.savefig(file_path / "summary_plot.png") diff --git a/docs/source/snippets/pipelines/dummy/significance_job_generator.txt b/docs/source/snippets/pipelines/dummy/significance_job_generator.txt new file mode 100644 index 0000000000..4d8c3e51fb --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/significance_job_generator.txt @@ -0,0 +1,16 @@ +class SignificanceJobGenerator(JobGenerator): + """Generate SignificantFeatureJob.""" + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: pd.DataFrame | None = None, + ) -> Generator[SignificantFeatureJob, None, None]: + """Generate Jobs via random selection.""" + assert prev_stage_result is not None, "Previous stage result is required" + yield SignificantFeatureJob(metrics_df=prev_stage_result) + + @property + def job_class(self) -> type: + """Return the job class.""" + return SignificantFeatureJob diff --git a/docs/source/snippets/pipelines/dummy/src_dir_structure.txt b/docs/source/snippets/pipelines/dummy/src_dir_structure.txt new file mode 100644 index 0000000000..d40f29bd7f --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/src_dir_structure.txt @@ -0,0 +1,7 @@ +src/anomalib/pipelines/experiment_pipeline +β”œβ”€β”€ __init__.py +β”œβ”€β”€ pipeline.py +β”œβ”€β”€ significance_job_generator.py +β”œβ”€β”€ significance_job.py +β”œβ”€β”€ train_job_generator.py +└── train_job.py diff --git a/docs/source/snippets/pipelines/dummy/tools_dir_structure.txt b/docs/source/snippets/pipelines/dummy/tools_dir_structure.txt new file mode 100644 index 0000000000..4b87d1ab6a --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/tools_dir_structure.txt @@ -0,0 +1,4 @@ +tools/experimental/experiment +β”œβ”€β”€ config.yaml +β”œβ”€β”€ experiment.py +└── __init__.py diff --git a/docs/source/snippets/pipelines/dummy/train_generator.txt b/docs/source/snippets/pipelines/dummy/train_generator.txt new file mode 100644 index 0000000000..06e58ec81a --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/train_generator.txt @@ -0,0 +1,19 @@ +class TrainJobGenerator(JobGenerator): + """Generate TrainJob.""" + + def generate_jobs( + self, + args: dict | None = None, + prev_stage_result: None = None, + ) -> Generator[TrainJob, None, None]: + """Generate Jobs via random selection.""" + for _ in range(args["experiments"]): + lr: float = np.random.uniform(args["lr"][0], args["lr"][1]) + backbone: str = np.random.choice(args["backbone"]) + stride: int = np.random.choice(args["stride"]) + yield TrainJob(lr=lr, backbone=backbone, stride=stride) + + @property + def job_class(self) -> type: + """Return the job class.""" + return TrainJob diff --git a/docs/source/snippets/pipelines/dummy/train_job.txt b/docs/source/snippets/pipelines/dummy/train_job.txt new file mode 100644 index 0000000000..d980c08ee2 --- /dev/null +++ b/docs/source/snippets/pipelines/dummy/train_job.txt @@ -0,0 +1,31 @@ +class TrainJob(Job): + name = "train" + + def __init__(self, lr: float, backbone: str, stride: int): + self.lr = lr + self.backbone = backbone + self.stride = stride + + def run(self, task_id: int | None = None) -> dict: + print(f"Training with lr: {self.lr}, backbone: {self.backbone}, stride: {self.stride}") + time.sleep(2) + score = np.random.uniform(0.7, 1.0) + return {"lr": self.lr, "backbone": self.backbone, "stride": self.stride, "score": score} + + @staticmethod + def collect(results: list[dict]) -> pd.DataFrame: + """Collect all individual runs into a dict of lists.""" + output: dict = {} + for key in results[0]: + output[key] = [] + for result in results: + for key, value in result.items(): + output[key].append(value) + return pd.DataFrame(output) + + @staticmethod + def save(results: pd.DataFrame) -> None: + """Save results in a csv file.""" + file_path = Path("runs") / TrainJob.name + file_path.mkdir(parents=True, exist_ok=True) + results.to_csv(file_path / "results.csv", index=False)