Add min-max normalization (#53)

* add min-max normalization * persist min and max observed values of validation set * pylint * license headers * removing print statement * revert default category * improve visualizer * switch to mean normalization * disable normalization for dfm * Revert "disable normalization for dfm" This reverts commit ef6f6dd. * limit values between 0 and 1 * move normalization functionality to functions * skip dfm test * rename cdf normalization * rename normalization files
openvinotoolkit · Jan 5, 2022 · b169c4d · b169c4d
1 parent 29eb504
commit b169c4d
Show file tree

Hide file tree

Showing 18 changed files with 327 additions and 58 deletions.
diff --git a/anomalib/core/callbacks/__init__.py b/anomalib/core/callbacks/__init__.py
@@ -7,9 +7,10 @@
 from omegaconf import DictConfig, ListConfig
 from pytorch_lightning.callbacks import Callback, ModelCheckpoint
 
+from .cdf_normalization import CdfNormalizationCallback
 from .compress import CompressModelCallback
+from .min_max_normalization import MinMaxNormalizationCallback
 from .model_loader import LoadModelCallback
-from .normalization import AnomalyScoreNormalizationCallback
 from .save_to_csv import SaveToCSVCallback
 from .timer import TimerCallback
 from .visualizer_callback import VisualizerCallback
@@ -51,17 +52,22 @@ def get_callbacks(config: Union[ListConfig, DictConfig]) -> List[Callback]:
         load_model = LoadModelCallback(os.path.join(config.project.path, config.model.weight_file))
         callbacks.append(load_model)
 
-    if "normalize_scores" in config.model.keys() and config.model.normalize_scores:
-        if config.model.name in ["padim", "stfpm"]:
-            if not config.optimization.nncf.apply:
-                callbacks.append(AnomalyScoreNormalizationCallback())
+    if "normalization_method" in config.model.keys() and not config.model.normalization_method == "none":
+        if config.model.normalization_method == "cdf":
+            if config.model.name in ["padim", "stfpm"]:
+                if not config.optimization.nncf.apply:
+                    callbacks.append(CdfNormalizationCallback())
+                else:
+                    raise NotImplementedError("CDF Score Normalization is currently not compatible with NNCF.")
             else:
-                raise NotImplementedError("Score Normalization is currently not compatible with NNCF.")
+                raise NotImplementedError("Score Normalization is currently supported for PADIM and STFPM only.")
+        elif config.model.normalization_method == "min_max":
+            callbacks.append(MinMaxNormalizationCallback())
         else:
-            raise NotImplementedError("Score Normalization is currently supported for PADIM and STFPM only.")
+            raise ValueError(f"Normalization method not recognized: {config.model.normalization_method}")
 
     if not config.project.log_images_to == []:
-        callbacks.append(VisualizerCallback(inputs_are_normalized=config.model.normalize_scores))
+        callbacks.append(VisualizerCallback(inputs_are_normalized=not config.model.normalization_method == "none"))
 
     if "optimization" in config.keys():
         if config.optimization.nncf.apply:

diff --git a/anomalib/core/callbacks/normalization.py → anomalib/core/callbacks/cdf_normalization.py b/anomalib/core/callbacks/normalization.py → anomalib/core/callbacks/cdf_normalization.py
@@ -2,15 +2,15 @@
 from typing import Any, Dict, Optional
 
 import pytorch_lightning as pl
-import torch
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.utilities.types import STEP_OUTPUT
-from torch.distributions import LogNormal, Normal
+from torch.distributions import LogNormal
 
 from anomalib.models import get_model
+from anomalib.utils.normalization.cdf import normalize, standardize
 
 
-class AnomalyScoreNormalizationCallback(Callback):
+class CdfNormalizationCallback(Callback):
     """Callback that standardizes the image-level and pixel-level anomaly scores."""
 
     def __init__(self):
@@ -43,7 +43,7 @@ def on_validation_batch_end(
         _dataloader_idx: int,
     ) -> None:
         """Called when the validation batch ends, standardizes the predicted scores and anomaly maps."""
-        self._standardize(outputs, pl_module)
+        self._standardize_batch(outputs, pl_module)
 
     def on_test_batch_end(
         self,
@@ -55,8 +55,8 @@ def on_test_batch_end(
         _dataloader_idx: int,
     ) -> None:
         """Called when the test batch ends, normalizes the predicted scores and anomaly maps."""
-        self._standardize(outputs, pl_module)
-        self._normalize(outputs, pl_module)
+        self._standardize_batch(outputs, pl_module)
+        self._normalize_batch(outputs, pl_module)
 
     def on_predict_batch_end(
         self,
@@ -68,8 +68,8 @@ def on_predict_batch_end(
         _dataloader_idx: int,
     ) -> None:
         """Called when the predict batch ends, normalizes the predicted scores and anomaly maps."""
-        self._standardize(outputs, pl_module)
-        self._normalize(outputs, pl_module)
+        self._standardize_batch(outputs, pl_module)
+        self._normalize_batch(outputs, pl_module)
         outputs["pred_labels"] = outputs["pred_scores"] >= 0.5
 
     def _collect_stats(self, trainer, pl_module):
@@ -97,23 +97,17 @@ def _create_inference_model(pl_module):
         new_model.load_state_dict(pl_module.state_dict())
         return new_model
 
-    def _standardize(self, outputs: STEP_OUTPUT, pl_module) -> None:
-        """Standardize the predicted scores and anomaly maps to the z-domain."""
+    @staticmethod
+    def _standardize_batch(outputs: STEP_OUTPUT, pl_module) -> None:
         stats = pl_module.training_distribution.to(outputs["pred_scores"].device)
-
-        outputs["pred_scores"] = torch.log(outputs["pred_scores"])
-        outputs["pred_scores"] = (outputs["pred_scores"] - stats.image_mean) / stats.image_std
+        outputs["pred_scores"] = standardize(outputs["pred_scores"], stats.image_mean, stats.image_std)
         if "anomaly_maps" in outputs.keys():
-            outputs["anomaly_maps"] = (torch.log(outputs["anomaly_maps"]) - stats.pixel_mean) / stats.pixel_std
-            outputs["anomaly_maps"] -= (stats.image_mean - stats.pixel_mean) / stats.pixel_std
+            outputs["anomaly_maps"] = standardize(
+                outputs["anomaly_maps"], stats.pixel_mean, stats.pixel_std, center_at=stats.image_mean
+            )
 
-    def _normalize(self, outputs: STEP_OUTPUT, pl_module: pl.LightningModule) -> None:
-        """Normalize the predicted scores and anomaly maps by first standardizing and then computing the CDF."""
-        device = outputs["pred_scores"].device
-        image_threshold = pl_module.image_threshold.value.cpu()
-        pixel_threshold = pl_module.pixel_threshold.value.cpu()
-
-        norm = Normal(torch.Tensor([0]), torch.Tensor([1]))
-        outputs["pred_scores"] = norm.cdf(outputs["pred_scores"].cpu() - image_threshold).to(device)
+    @staticmethod
+    def _normalize_batch(outputs: STEP_OUTPUT, pl_module: pl.LightningModule) -> None:
+        outputs["pred_scores"] = normalize(outputs["pred_scores"], pl_module.image_threshold.value)
         if "anomaly_maps" in outputs.keys():
-            outputs["anomaly_maps"] = norm.cdf(outputs["anomaly_maps"].cpu() - pixel_threshold).to(device)
+            outputs["anomaly_maps"] = normalize(outputs["anomaly_maps"], pl_module.pixel_threshold.value)
diff --git a/anomalib/core/callbacks/min_max_normalization.py b/anomalib/core/callbacks/min_max_normalization.py
@@ -0,0 +1,83 @@
+"""Anomaly Score Normalization Callback that uses min-max normalization."""
+
+# Copyright (C) 2020 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+from typing import Any, Dict
+
+import pytorch_lightning as pl
+from pytorch_lightning import Callback
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+
+from anomalib.utils.normalization.min_max import normalize
+
+
+class MinMaxNormalizationCallback(Callback):
+    """Callback that normalizes the image-level and pixel-level anomaly scores using min-max normalization."""
+
+    def on_test_start(self, _trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+        """Called when the test begins."""
+        pl_module.image_metrics.F1.threshold = 0.5
+        pl_module.pixel_metrics.F1.threshold = 0.5
+
+    def on_validation_batch_end(
+        self,
+        _trainer: pl.Trainer,
+        pl_module: pl.LightningModule,
+        outputs: STEP_OUTPUT,
+        _batch: Any,
+        _batch_idx: int,
+        _dataloader_idx: int,
+    ) -> None:
+        """Called when the validation batch ends, update the min and max observed values."""
+        if "anomaly_maps" in outputs.keys():
+            pl_module.min_max(outputs["anomaly_maps"])
+        else:
+            pl_module.min_max(outputs["pred_scores"])
+
+    def on_test_batch_end(
+        self,
+        _trainer: pl.Trainer,
+        pl_module: pl.LightningModule,
+        outputs: STEP_OUTPUT,
+        _batch: Any,
+        _batch_idx: int,
+        _dataloader_idx: int,
+    ) -> None:
+        """Called when the test batch ends, normalizes the predicted scores and anomaly maps."""
+        self._normalize_batch(outputs, pl_module)
+
+    def on_predict_batch_end(
+        self,
+        _trainer: pl.Trainer,
+        pl_module: pl.LightningModule,
+        outputs: Dict,
+        _batch: Any,
+        _batch_idx: int,
+        _dataloader_idx: int,
+    ) -> None:
+        """Called when the predict batch ends, normalizes the predicted scores and anomaly maps."""
+        self._normalize_batch(outputs, pl_module)
+
+    @staticmethod
+    def _normalize_batch(outputs, pl_module):
+        """Normalize a batch of predictions."""
+        stats = pl_module.min_max
+        outputs["pred_scores"] = normalize(
+            outputs["pred_scores"], pl_module.image_threshold.value, stats.min, stats.max
+        )
+        if "anomaly_maps" in outputs.keys():
+            outputs["anomaly_maps"] = normalize(
+                outputs["anomaly_maps"], pl_module.pixel_threshold.value, stats.min, stats.max
+            )
diff --git a/anomalib/core/callbacks/visualizer_callback.py b/anomalib/core/callbacks/visualizer_callback.py
@@ -92,11 +92,10 @@ def on_test_batch_end(
         assert outputs is not None
 
         if self.inputs_are_normalized:
-            threshold = 0.5
             normalize = False  # anomaly maps are already normalized
         else:
-            threshold = pl_module.pixel_threshold.value.item()
             normalize = True  # raw anomaly maps. Still need to normalize
+        threshold = pl_module.pixel_metrics.F1.threshold
 
         for (filename, image, true_mask, anomaly_map) in zip(
             outputs["image_path"], outputs["image"], outputs["mask"], outputs["anomaly_maps"]

diff --git a/anomalib/core/metrics/__init__.py b/anomalib/core/metrics/__init__.py
@@ -2,6 +2,7 @@
 from .adaptive_threshold import AdaptiveThreshold
 from .anomaly_score_distribution import AnomalyScoreDistribution
 from .auroc import AUROC
+from .min_max import MinMax
 from .optimal_f1 import OptimalF1
 
-__all__ = ["AUROC", "OptimalF1", "AdaptiveThreshold", "AnomalyScoreDistribution"]
+__all__ = ["AUROC", "OptimalF1", "AdaptiveThreshold", "AnomalyScoreDistribution", "MinMax"]
diff --git a/anomalib/core/metrics/min_max.py b/anomalib/core/metrics/min_max.py
@@ -0,0 +1,43 @@
+"""Module that tracks the min and max values of the observations in each batch."""
+
+# Copyright (C) 2020 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+from typing import Tuple
+
+import torch
+from torch import Tensor
+from torchmetrics import Metric
+
+
+class MinMax(Metric):
+    """Track the min and max values of the observations in each batch."""
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.add_state("min", torch.tensor(float("inf")), persistent=True)  # pylint: disable=not-callable
+        self.add_state("max", torch.tensor(float("-inf")), persistent=True)  # pylint: disable=not-callable
+
+        self.min = torch.tensor(float("inf"))  # pylint: disable=not-callable
+        self.max = torch.tensor(float("-inf"))  # pylint: disable=not-callable
+
+    # pylint: disable=arguments-differ
+    def update(self, predictions: Tensor) -> None:  # type: ignore
+        """Update the min and max values."""
+        self.max = torch.max(self.max, torch.max(predictions))
+        self.min = torch.min(self.min, torch.min(predictions))
+
+    def compute(self) -> Tuple[Tensor, Tensor]:
+        """Return min and max values."""
+        return self.min, self.max
diff --git a/anomalib/core/model/anomaly_module.py b/anomalib/core/model/anomaly_module.py
@@ -22,7 +22,12 @@
 from torch import Tensor, nn
 from torchmetrics import F1, MetricCollection
 
-from anomalib.core.metrics import AUROC, AdaptiveThreshold, AnomalyScoreDistribution
+from anomalib.core.metrics import (
+    AUROC,
+    AdaptiveThreshold,
+    AnomalyScoreDistribution,
+    MinMax,
+)
 
 
 class AnomalyModule(pl.LightningModule):
@@ -47,6 +52,7 @@ def __init__(self, params: Union[DictConfig, ListConfig]):
         self.pixel_threshold = AdaptiveThreshold(self.hparams.model.threshold.pixel_default)
 
         self.training_distribution = AnomalyScoreDistribution()
+        self.min_max = MinMax()
 
         self.model: nn.Module
 
@@ -141,8 +147,8 @@ def _compute_adaptive_threshold(self, outputs):
         else:
             self.pixel_threshold.value = self.image_threshold.value
 
-        self.image_metrics.F1.threshold = self.image_threshold.value
-        self.pixel_metrics.F1.threshold = self.pixel_threshold.value
+        self.image_metrics.F1.threshold = self.image_threshold.value.item()
+        self.pixel_metrics.F1.threshold = self.pixel_threshold.value.item()
 
     def _collect_outputs(self, image_metric, pixel_metric, outputs):
         for output in outputs:

diff --git a/anomalib/core/model/inference.py b/anomalib/core/model/inference.py
@@ -23,13 +23,15 @@
 import torch
 from omegaconf import DictConfig, ListConfig
 from openvino.inference_engine import IECore  # pylint: disable=no-name-in-module
-from scipy.stats import norm
 from torch import Tensor, nn
 
 from anomalib.core.model import AnomalyModule
 from anomalib.data.transforms.pre_process import PreProcessor
 from anomalib.data.utils import read_image
 from anomalib.models import get_model
+from anomalib.utils.normalization.cdf import normalize as normalize_cdf
+from anomalib.utils.normalization.cdf import standardize
+from anomalib.utils.normalization.min_max import normalize as normalize_min_max
 from anomalib.utils.post_process import superimpose_anomaly_map
 
 
@@ -287,18 +289,24 @@ def post_process(self, predictions: np.ndarray, meta_data: Optional[Dict] = None
         anomaly_map = predictions.squeeze()
         pred_score = anomaly_map.reshape(-1).max()
 
+        # min max normalization
+        if "min" in meta_data and "max" in meta_data:
+            anomaly_map = normalize_min_max(
+                anomaly_map, meta_data["pixel_threshold"], meta_data["min"], meta_data["max"]
+            )
+            pred_score = normalize_min_max(pred_score, meta_data["image_threshold"], meta_data["min"], meta_data["max"])
+
         # standardize pixel scores
         if "pixel_mean" in meta_data.keys() and "pixel_std" in meta_data.keys():
-            anomaly_map = np.log(anomaly_map)
-            anomaly_map = (anomaly_map - meta_data["pixel_mean"]) / meta_data["pixel_std"]
-            anomaly_map -= (meta_data["image_mean"] - meta_data["pixel_mean"]) / meta_data["pixel_std"]
-            anomaly_map = norm.cdf(anomaly_map - meta_data["pixel_threshold"])
+            anomaly_map = standardize(
+                anomaly_map, meta_data["pixel_mean"], meta_data["pixel_std"], center_at=meta_data["image_mean"]
+            )
+            anomaly_map = normalize_cdf(anomaly_map, meta_data["pixel_threshold"])
 
         # standardize image scores
         if "image_mean" in meta_data.keys() and "image_std" in meta_data.keys():
-            pred_score = np.log(pred_score)
-            pred_score = (pred_score - meta_data["image_mean"]) / meta_data["image_std"]
-            pred_score = norm.cdf(pred_score - meta_data["image_threshold"])
+            pred_score = standardize(pred_score, meta_data["image_mean"], meta_data["image_std"])
+            pred_score = normalize_cdf(pred_score, meta_data["image_threshold"])
 
         if "image_shape" in meta_data and anomaly_map.shape != meta_data["image_shape"]:
             anomaly_map = cv2.resize(anomaly_map, meta_data["image_shape"])

diff --git a/anomalib/models/dfkde/config.yaml b/anomalib/models/dfkde/config.yaml
@@ -18,7 +18,7 @@ model:
   confidence_threshold: 0.5
   pre_processing: scale
   n_components: 16
-  normalize_scores: false # currently not supported for this model
+  normalization_method: min_max # options: [null, min_max, cdf]
   threshold:
     image_default: 0
     adaptive: true

diff --git a/anomalib/models/dfm/config.yaml b/anomalib/models/dfm/config.yaml
@@ -17,7 +17,7 @@ model:
   pca_level: 0.97
   score_type: fre # nll: for Gaussian modeling, fre: pca feature reconstruction error
   project_path: ./results
-  normalize_scores: false # currently not supported for this model
+  normalization_method: min_max # options: [null, min_max, cdf]
   threshold:
     image_default: 0
     adaptive: true

diff --git a/anomalib/models/padim/config.yaml b/anomalib/models/padim/config.yaml
@@ -26,7 +26,7 @@ model:
     - layer2
     - layer3
   metric: auc
-  normalize_scores: true
+  normalization_method: min_max # options: [none, min_max, cdf]
   threshold:
     image_default: 3
     pixel_default: 3

diff --git a/anomalib/models/patchcore/config.yaml b/anomalib/models/patchcore/config.yaml
@@ -28,7 +28,7 @@ model:
   num_neighbors: 9
   metric: auc
   weight_file: weights/model.ckpt
-  normalize_scores: false # currently not supported for this model
+  normalization_method: min_max # options: [null, min_max, cdf]
   threshold:
     image_default: 0
     pixel_default: 0