openvinotoolkit · samet-akcay · Mar 31, 2022 · Mar 30, 2022 · Mar 30, 2022
diff --git a/README.md b/README.md
@@ -65,10 +65,10 @@ pip install -e .
 ## Training
 
 By default [`python tools/train.py`](https://gitlab-icv.inn.intel.com/algo_rnd_team/anomaly/-/blob/development/train.py)
-runs [PADIM](https://arxiv.org/abs/2011.08785) model [MVTec](https://www.mvtec.com/company/research/datasets/mvtec-ad) `leather` dataset.
+runs [PADIM](https://arxiv.org/abs/2011.08785) model on `leather` category from the [MVTec AD](https://www.mvtec.com/company/research/datasets/mvtec-ad) [(CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/)  dataset.
 
 ```bash
-python tools/train.py    # Train PADIM on MVTec leather
+python tools/train.py    # Train PADIM on MVTec AD leather
 ```
 
 Training a model on a specific dataset and category requires further configuration. Each model has its own configuration
@@ -181,9 +181,11 @@ python tools/inference.py \
 ___
 
 ## Datasets
-The `development` branch supports MVTec and BeanTech datasets.
+`anomalib` supports MVTec AD [(CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/) and BeanTech [(CC-BY-SA)](https://creativecommons.org/licenses/by-sa/4.0/legalcode) for benchmarking and `folder` for custom dataset training/inference.
 
-### [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+### [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+MVTec AD dataset is one of the main benchmarks for anomaly detection, and is released under the
+Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License [(CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).
 
 ### Image-Level AUC
 

diff --git a/anomalib/data/btech.py b/anomalib/data/btech.py
@@ -155,7 +155,7 @@ def __init__(
         seed: int = 0,
         create_validation_set: bool = False,
     ) -> None:
-        """Mvtec Dataset class.
+        """Btech Dataset class.
 
         Args:
             root: Path to the BTech dataset

diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py
@@ -80,8 +80,7 @@ def make_dataset(
             Defaults to 0.2.
         seed (int, optional): Random seed to ensure reproducibility when splitting. Defaults to 0.
         create_validation_set (bool, optional):Boolean to create a validation set from the test set.
-            MVTec dataset does not contain a validation set. Those wanting to create a validation set
-            could set this flag to ``True``.
+            Those wanting to create a validation set could set this flag to ``True``.
         extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
             directory.
 
@@ -185,8 +184,7 @@ def __init__(
             task (Optional[str], optional): Task type. (classification or segmentation) Defaults to None.
             seed (int, optional): Random seed to ensure reproducibility when splitting. Defaults to 0.
             create_validation_set (bool, optional):Boolean to create a validation set from the test set.
-                MVTec dataset does not contain a validation set. Those wanting to create a validation set
-                could set this flag to ``True``.
+                Those wanting to create a validation set could set this flag to ``True``.
 
         Raises:
             ValueError: When task is set to classification and `mask_dir` is provided. When `mask_dir` is
@@ -248,7 +246,7 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
             if self.task == "segmentation":
                 mask_path = self.samples.mask_path[index]
 
-                # Only Anomalous (1) images has masks in MVTec dataset.
+                # Only Anomalous (1) images has masks in MVTec AD dataset.
                 # Therefore, create empty mask for Normal (0) images.
                 if label_index == 0:
                     mask = np.zeros(shape=image.shape[:2])
@@ -310,8 +308,7 @@ def __init__(
             transform_config (Optional[Union[str, A.Compose]], optional): Config for pre-processing.
                 Defaults to None.
             create_validation_set (bool, optional):Boolean to create a validation set from the test set.
-                MVTec dataset does not contain a validation set. Those wanting to create a validation set
-                could set this flag to ``True``.
+                Those wanting to create a validation set could set this flag to ``True``.
 
         Examples:
             Assume that we use Folder Dataset for the MVTec/bottle/broken_large category. We would do:
@@ -333,7 +330,7 @@ def __init__(
 
             We could also create a Folder DataModule for datasets containing mask annotations.
             The dataset expects that mask annotation filenames must be same as the original filename.
-            To this end, we modified mask filenames in MVTec bottle category.
+            To this end, we modified mask filenames in MVTec AD bottle category.
             Now we could try folder data module using the mvtec bottle broken large category
             >>> datamodule = FolderDataModule(
             ...     root="./datasets/bottle/test",

diff --git a/anomalib/data/mvtec.py b/anomalib/data/mvtec.py
@@ -1,10 +1,27 @@
-"""MVTec Dataset.
-
-MVTec This script contains PyTorch Dataset, Dataloader and PyTorch
-Lightning DataModule for the MVTec dataset.
-
-If the dataset is not on the file system, the script downloads and
-extracts the dataset and create PyTorch data objects.
+"""MVTec AD Dataset (CC BY-NC-SA 4.0).
+
+Description:
+    This script contains PyTorch Dataset, Dataloader and PyTorch
+        Lightning DataModule for the MVTec AD dataset.
+
+    If the dataset is not on the file system, the script downloads and
+        extracts the dataset and create PyTorch data objects.
+
+License:
+    MVTec AD dataset is released under the Creative Commons
+    Attribution-NonCommercial-ShareAlike 4.0 International License
+    (CC BY-NC-SA 4.0)(https://creativecommons.org/licenses/by-nc-sa/4.0/).
+
+Reference:
+    - Paul Bergmann, Kilian Batzner, Michael Fauser, David Sattlegger, Carsten Steger:
+      The MVTec Anomaly Detection Dataset: A Comprehensive Real-World Dataset for
+      Unsupervised Anomaly Detection; in: International Journal of Computer Vision
+      129(4):1038-1059, 2021, DOI: 10.1007/s11263-020-01400-4.
+
+    - Paul Bergmann, Michael Fauser, David Sattlegger, Carsten Steger: MVTec AD —
+      A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection;
+      in: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
+      9584-9592, 2019, DOI: 10.1109/CVPR.2019.00982.
 """
 
 # Copyright (C) 2020 Intel Corporation
@@ -47,7 +64,7 @@
 )
 from anomalib.pre_processing import PreProcessor
 
-logger = logging.getLogger(name="Dataset: MVTec")
+logger = logging.getLogger(name="Dataset: MVTec AD")
 logger.setLevel(logging.DEBUG)
 
 
@@ -58,7 +75,7 @@ def make_mvtec_dataset(
     seed: int = 0,
     create_validation_set: bool = False,
 ) -> DataFrame:
-    """Create MVTec samples by parsing the MVTec data file structure.
+    """Create MVTec AD samples by parsing the MVTec AD data file structure.
 
     The files are expected to follow the structure:
         path/to/dataset/split/category/image_filename.png
@@ -79,11 +96,11 @@ def make_mvtec_dataset(
             Defaults to 0.1.
         seed (int, optional): Random seed to ensure reproducibility when splitting. Defaults to 0.
         create_validation_set (bool, optional): Boolean to create a validation set from the test set.
-            MVTec dataset does not contain a validation set. Those wanting to create a validation set
+            MVTec AD dataset does not contain a validation set. Those wanting to create a validation set
             could set this flag to ``True``.
 
     Example:
-        The following example shows how to get training samples from MVTec bottle category:
+        The following example shows how to get training samples from MVTec AD bottle category:
 
         >>> root = Path('./MVTec')
         >>> category = 'bottle'
@@ -149,7 +166,7 @@ def make_mvtec_dataset(
 
 
 class MVTec(VisionDataset):
-    """MVTec PyTorch Dataset."""
+    """MVTec AD PyTorch Dataset."""
 
     def __init__(
         self,
@@ -161,11 +178,11 @@ def __init__(
         seed: int = 0,
         create_validation_set: bool = False,
     ) -> None:
-        """Mvtec Dataset class.
+        """Mvtec AD Dataset class.
 
         Args:
-            root: Path to the MVTec dataset
-            category: Name of the MVTec category.
+            root: Path to the MVTec AD dataset
+            category: Name of the MVTec AD category.
             pre_process: List of pre_processing object containing albumentation compose.
             split: 'train', 'val' or 'test'
             task: ``classification`` or ``segmentation``
@@ -248,7 +265,7 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
             if self.task == "segmentation":
                 mask_path = self.samples.mask_path[index]
 
-                # Only Anomalous (1) images has masks in MVTec dataset.
+                # Only Anomalous (1) images has masks in MVTec AD dataset.
                 # Therefore, create empty mask for Normal (0) images.
                 if label_index == 0:
                     mask = np.zeros(shape=image.shape[:2])
@@ -265,7 +282,7 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
 
 
 class MVTecDataModule(LightningDataModule):
-    """MVTec Lightning Data Module."""
+    """MVTec AD Lightning Data Module."""
 
     def __init__(
         self,
@@ -280,11 +297,11 @@ def __init__(
         seed: int = 0,
         create_validation_set: bool = False,
     ) -> None:
-        """Mvtec Lightning Data Module.
+        """Mvtec AD Lightning Data Module.
 
         Args:
-            root: Path to the MVTec dataset
-            category: Name of the MVTec category.
+            root: Path to the MVTec AD dataset
+            category: Name of the MVTec AD category.
             image_size: Variable to which image is resized.
             train_batch_size: Training batch size.
             test_batch_size: Testing batch size.
@@ -350,7 +367,7 @@ def prepare_data(self) -> None:
             dataset_name = "mvtec_anomaly_detection.tar.xz"
 
             logging.info("Downloading the dataset.")
-            with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc="MVTec") as progress_bar:
+            with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc="MVTec AD") as progress_bar:
                 urlretrieve(
                     url=f"ftp://guest:GU.205dldo@ftp.softronics.ch/mvtec_anomaly_detection/{dataset_name}",
                     filename=self.root / dataset_name,

diff --git a/anomalib/data/utils/split.py b/anomalib/data/utils/split.py
@@ -43,7 +43,7 @@ def split_normal_images_in_train_set(
         samples (DataFrame): Dataframe containing dataset info such as filenames, splits etc.
         split_ratio (float, optional): Train-Test normal image split ratio. Defaults to 0.1.
         seed (int, optional): Random seed to ensure reproducibility. Defaults to 0.
-        normal_label (str): Name of the normal label. For MVTec, for instance, this is normal_label.
+        normal_label (str): Name of the normal label. For MVTec AD, for instance, this is normal_label.
 
     Returns:
         DataFrame: Output dataframe where the part of the training set is assigned to test set.
@@ -71,7 +71,7 @@ def create_validation_set_from_test_set(samples: DataFrame, seed: int = 0, norma
     Args:
         samples (DataFrame): Dataframe containing dataset info such as filenames, splits etc.
         seed (int, optional): Random seed to ensure reproducibility. Defaults to 0.
-        normal_label (str): Name of the normal label. For MVTec, for instance, this is normal_label.
+        normal_label (str): Name of the normal label. For MVTec AD, for instance, this is normal_label.
     """
 
     if seed > 0:

diff --git a/anomalib/models/cflow/README.md b/anomalib/models/cflow/README.md
@@ -20,7 +20,7 @@ CFLOW model is based on a conditional normalizing flow framework adopted for ano
 
 All results gathered with seed `42`.
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/anomalib/models/dfkde/README.md b/anomalib/models/dfkde/README.md
@@ -22,7 +22,7 @@ In the anomaly classification stage, the features are first reduced to the first
 
 All results gathered with seed `42`.
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/anomalib/models/dfm/README.md b/anomalib/models/dfm/README.md
@@ -24,7 +24,7 @@ In the anomaly classification stage, class-conditional PCA transformations and G
 
 All results gathered with seed `42`.
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/anomalib/models/ganomaly/README.md b/anomalib/models/ganomaly/README.md
@@ -22,7 +22,7 @@ The key idea here is that, during inference, when an anomalous image is passed t
 
 All results gathered with seed `42`.
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/anomalib/models/padim/README.md b/anomalib/models/padim/README.md
@@ -22,7 +22,7 @@ During inference, Mahalanobis distance is used to score each patch position of t
 
 All results gathered with seed `42`.
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/anomalib/models/patchcore/README.md b/anomalib/models/patchcore/README.md
@@ -22,7 +22,7 @@ During inference this memory bank is coreset subsampled. Coreset subsampling gen
 
 All results gathered with seed `42`.
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/anomalib/models/stfpm/README.md b/anomalib/models/stfpm/README.md
@@ -22,7 +22,7 @@ During inference, the feature pyramids of teacher and student networks are compa
 
 All results gathered with seed `42`.
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/docs/source/guides/getting_started.rst b/docs/source/guides/getting_started.rst
@@ -31,12 +31,12 @@ Training
 By default
 `python tools/train.py <https://gitlab-icv.inn.intel.com/algo_rnd_team/anomaly/blob/samet/stfpm/tools/train.py>`__
 runs `STFPM <https://arxiv.org/pdf/2103.04257.pdf>`__ model
-`MVTec <https://www.mvtec.com/company/research/datasets/mvtec-ad>`__
+`MVTec AD <https://www.mvtec.com/company/research/datasets/mvtec-ad>`__
 ``leather`` dataset.
 
 ::
 
-    python tools/train.py    # Train STFPM on MVTec leather
+    python tools/train.py    # Train STFPM on MVTec AD leather
 
 Training a model on a specific dataset and category requires further
 configuration. Each model has its own configuration file,

diff --git a/docs/source/research/benchmark.md b/docs/source/research/benchmark.md
@@ -1,6 +1,6 @@
 # Benchmark
 
-## [MVTec Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
+## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad)
 
 ### Image-Level AUC
 

diff --git a/tests/helpers/dataset.py b/tests/helpers/dataset.py
@@ -96,7 +96,7 @@ def __init__(
         seed: int = 0,
     ) -> None:
         """Creates a context for Generating Dummy Dataset. Useful for wrapping test functions.
-        NOTE: for MVTec dataset it does not return a category.
+        NOTE: for MVTec AD dataset it does not return a category.
         It is adviced to use a default parameter in the function
 
         Args:
@@ -107,8 +107,8 @@ def __init__(
             max_size (Optional[int], optional): Maximum size of the test shapes. Defaults to 10.
             train_shapes (List[str], optional): List of good shapes. Defaults to ["circle", "rectangle"].
             test_shapes (List[str], optional): List of anomalous shapes. Defaults to ["triangle", "ellipse"].
-            path (Union[str, Path], optional): Path to MVTec dataset. Defaults to "./datasets/MVTec".
-            use_mvtec (bool, optional): Use MVTec dataset or dummy dataset. Defaults to False.
+            path (Union[str, Path], optional): Path to MVTec AD dataset. Defaults to "./datasets/MVTec".
+            use_mvtec (bool, optional): Use MVTec AD dataset or dummy dataset. Defaults to False.
             seed (int, optional): Fixes seed if any number greater than 0 is provided. 0 means no seed. Defaults to 0.
 
         Example:
@@ -130,7 +130,7 @@ def __init__(
     def __call__(self, func):
         @wraps(func)
         def inner(*args, **kwds):
-            # If true, will use MVTech dataset for testing.
+            # If true, will use MVTech AD dataset for testing.
             # Useful for nightly builds
             if self.use_mvtec:
                 return func(*args, path=self.path, **kwds)
@@ -191,7 +191,7 @@ def __init__(
 
     def _generate_dataset(self):
         """Generates dummy dataset in a temporary directory using the same
-        convention as MVTec."""
+        convention as MVTec AD."""
         # create train images
         train_path = os.path.join(self.root_dir, "shapes", "train", "good")
         os.makedirs(train_path, exist_ok=True)

diff --git a/tests/nightly/models/test_model_nightly.py b/tests/nightly/models/test_model_nightly.py
@@ -1,4 +1,4 @@
-"""Test Models on all MVTec Categories."""
+"""Test Models on all MVTec AD Categories."""
 
 # Copyright (C) 2020 Intel Corporation
 #

diff --git a/tests/pre_merge/datasets/test_dataset.py b/tests/pre_merge/datasets/test_dataset.py
@@ -79,7 +79,7 @@ def data_sample(mvtec_data_module):
 
 
 class TestMVTecDataModule:
-    """Test MVTec Data Module."""
+    """Test MVTec AD Data Module."""
 
     def test_batch_size(self, mvtec_data_module):
         """test_mvtec_datamodule [summary]"""

diff --git a/tools/benchmarking/README.md b/tools/benchmarking/README.md
@@ -10,7 +10,7 @@ Run the train.sh with the same args as the tools/train.py. Refer to [`../README.
 Note: To collect memory read/write numbers, run the script with sudo privileges. Otherwise, those values will be blank.
 
 ```
-sudo -E ./train.sh    # Train STFPM on MVTec leather
+sudo -E ./train.sh    # Train STFPM on MVTec AD leather
 
 sudo -E ./train.sh --model_config_path <path/to/model/config.yaml>