Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor PreProcessor and fix Visualizer denormalization issue. #570

Merged
merged 28 commits into from
Sep 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ee1cfce
move sample generation to datamodule instead of dataset
djdameln Sep 9, 2022
ec5199e
move sample generation from init to setup
djdameln Sep 12, 2022
9f0a35e
remove inference stage and add base classes
djdameln Sep 13, 2022
dea176f
replace dataset classes with AnomalibDataset
djdameln Sep 13, 2022
62a04f8
move setup to base class, create samples as class method
djdameln Sep 13, 2022
e91afad
update docstrings
djdameln Sep 13, 2022
df4a805
refactor btech to new format
djdameln Sep 14, 2022
c225a83
allow training with no anomalous data
djdameln Sep 14, 2022
ac0dc8a
remove MVTec name from comment
djdameln Sep 15, 2022
5d90209
raise NotImplementedError in base class
djdameln Sep 15, 2022
c1e6724
allow both png and bmp images for btech
djdameln Sep 15, 2022
2d70d89
use label_index to check if dataset contains anomalous images
djdameln Sep 16, 2022
f5f17db
refactor getitem in dataset class
djdameln Sep 16, 2022
f02065f
use iloc for indexing
djdameln Sep 16, 2022
9cba9da
move dataloader getters to base class
djdameln Sep 16, 2022
5b3e841
refactor to add validate stage in setup
djdameln Sep 16, 2022
98b56ee
Merge branch 'da/refactor-datamodules' of github.com:openvinotoolkit/…
samet-akcay Sep 17, 2022
0d82c5c
Add warning message when there is no config file passed
samet-akcay Sep 20, 2022
4572d84
Merge branch 'main' of github.com:openvinotoolkit/anomalib into fix/s…
samet-akcay Sep 20, 2022
8a815e0
Extract get_transforms and get_height_and_width functions
samet-akcay Sep 21, 2022
764c111
refactor pre-processor and fix visualizer normalization issue
samet-akcay Sep 21, 2022
6fa4756
Revert thenew data refactor
samet-akcay Sep 21, 2022
3ebc2d5
rename variable
samet-akcay Sep 21, 2022
e6f0309
Revert the changes not merged yet
samet-akcay Sep 21, 2022
679757b
Fix tests
samet-akcay Sep 21, 2022
91e92db
Fix tests
samet-akcay Sep 21, 2022
98597c3
Address codacy concerns
samet-akcay Sep 22, 2022
07708c2
Merge branch 'main' into fix/sa/custom-data-normalization
samet-akcay Sep 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion anomalib/data/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@

from .download import DownloadProgressBar, hash_check
from .generators import random_2d_perlin
from .image import generate_output_image_filename, get_image_filenames, read_image
from .image import (
generate_output_image_filename,
get_image_filenames,
get_image_height_and_width,
read_image,
)

__all__ = [
"generate_output_image_filename",
"get_image_filenames",
"get_image_height_and_width",
"hash_check",
"random_2d_perlin",
"read_image",
Expand Down
52 changes: 50 additions & 2 deletions anomalib/data/utils/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import math
import warnings
from pathlib import Path
from typing import List, Union
from typing import List, Optional, Tuple, Union

import cv2
import numpy as np
Expand Down Expand Up @@ -141,7 +141,48 @@ def generate_output_image_filename(input_path: Union[str, Path], output_path: Un
return file_path


def read_image(path: Union[str, Path]) -> np.ndarray:
def get_image_height_and_width(image_size: Optional[Union[int, Tuple]] = None) -> Tuple[Optional[int], Optional[int]]:
"""Get image height and width from ``image_size`` variable.

Args:
image_size (Optional[Union[int, Tuple[int, int]]], optional): Input image size.

Raises:
ValueError: Image size not None, int or tuple.

Examples:
>>> get_image_height_and_width(image_size=256)
(256, 256)

>>> get_image_height_and_width(image_size=(256, 256))
(256, 256)

>>> get_image_height_and_width(image_size=(256, 256, 3))
(256, 256)

>>> get_image_height_and_width(image_size=256.)
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "<string>", line 18, in get_image_height_and_width
ValueError: ``image_size`` could be either int or Tuple[int, int]

Returns:
Tuple[Optional[int], Optional[int]]: A tuple containing image height and width values.
"""
height_and_width: Tuple[Optional[int], Optional[int]]
if isinstance(image_size, int):
height_and_width = (image_size, image_size)
elif isinstance(image_size, tuple):
height_and_width = int(image_size[0]), int(image_size[1])
elif image_size is None:
height_and_width = (None, None)
else:
raise ValueError("``image_size`` could be either int or Tuple[int, int]")

return height_and_width


def read_image(path: Union[str, Path], image_size: Optional[Union[int, Tuple]] = None) -> np.ndarray:
"""Read image from disk in RGB format.

Args:
Expand All @@ -157,6 +198,13 @@ def read_image(path: Union[str, Path]) -> np.ndarray:
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

if image_size:
# This part is optional, where the user wants to quickly resize the image
# with a one-liner code. This would particularly be useful especially when
# prototyping new ideas.
height, width = get_image_height_and_width(image_size)
image = cv2.resize(image, dsize=(width, height), interpolation=cv2.INTER_AREA)

return image


Expand Down
7 changes: 4 additions & 3 deletions anomalib/post_processing/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
import numpy as np
from skimage.segmentation import mark_boundaries

from anomalib.data.utils import read_image
from anomalib.post_processing.post_process import (
add_anomalous_label,
add_normal_label,
superimpose_anomaly_map,
)
from anomalib.pre_processing.transforms import Denormalize


@dataclass
Expand Down Expand Up @@ -73,9 +73,10 @@ def visualize_batch(self, batch: Dict) -> Iterator[np.ndarray]:
Returns:
Generator that yields a display-ready visualization for each image.
"""
for i in range(batch["image"].size(0)):
batch_size, _num_channels, height, width = batch["image"].size()
for i in range(batch_size):
image_result = ImageResult(
image=Denormalize()(batch["image"][i].cpu()),
image=read_image(path=batch["image_path"][i], image_size=(height, width)),
pred_score=batch["pred_scores"][i].cpu().numpy().item(),
pred_label=batch["pred_labels"][i].cpu().numpy().item(),
anomaly_map=batch["anomaly_maps"][i].cpu().numpy() if "anomaly_maps" in batch else None,
Expand Down
157 changes: 101 additions & 56 deletions anomalib/pre_processing/pre_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,111 @@
# Copyright (C) 2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import logging
from typing import Optional, Tuple, Union

import albumentations as A
from albumentations.pytorch import ToTensorV2

from anomalib.data.utils import get_image_height_and_width

logger = logging.getLogger(__name__)


def get_transforms(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also move this function to data/utils?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this is still one of the most important components of pre-processing sub-package. If we move it to data/utils, then pre_processing would become quite a weak package that we could consider removing in the future

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case we could ask ourselves if we really need the pre-processing module

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, that's the question, I think

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm merging this one, we could discuss this later on

config: Optional[Union[str, A.Compose]] = None,
image_size: Optional[Union[int, Tuple]] = None,
to_tensor: bool = True,
) -> A.Compose:
"""Get transforms from config or image size.

Args:
config (Optional[Union[str, A.Compose]], optional): Albumentations transforms.
Either config or albumentations ``Compose`` object. Defaults to None.
image_size (Optional[Union[int, Tuple]], optional): Image size to transform. Defaults to None.
to_tensor (bool, optional): Boolean to convert the final transforms into Torch tensor. Defaults to True.

Raises:
ValueError: When both ``config`` and ``image_size`` is ``None``.
ValueError: When ``config`` is not a ``str`` or `A.Compose`` object.

Returns:
A.Compose: Albumentation ``Compose`` object containing the image transforms.

Examples:
>>> import skimage
>>> image = skimage.data.astronaut()

>>> transforms = get_transforms(image_size=256, to_tensor=False)
>>> output = transforms(image=image)
>>> output["image"].shape
(256, 256, 3)

>>> transforms = get_transforms(image_size=256, to_tensor=True)
>>> output = transforms(image=image)
>>> output["image"].shape
torch.Size([3, 256, 256])


Transforms could be read from albumentations Compose object.
>>> import albumentations as A
>>> from albumentations.pytorch import ToTensorV2
>>> config = A.Compose([A.Resize(512, 512), ToTensorV2()])
>>> transforms = get_transforms(config=config, to_tensor=False)
>>> output = transforms(image=image)
>>> output["image"].shape
(512, 512, 3)
>>> type(output["image"])
numpy.ndarray

Transforms could be deserialized from a yaml file.
>>> transforms = A.Compose([A.Resize(1024, 1024), ToTensorV2()])
>>> A.save(transforms, "/tmp/transforms.yaml", data_format="yaml")
>>> transforms = get_transforms(config="/tmp/transforms.yaml")
>>> output = transforms(image=image)
>>> output["image"].shape
torch.Size([3, 1024, 1024])
"""
if config is None and image_size is None:
raise ValueError(
"Both config and image_size cannot be `None`. "
"Provide either config file to de-serialize transforms "
"or image_size to get the default transformations"
)

transforms: A.Compose

if config is None and image_size is not None:
logger.warning("Transform configs has not been provided. Images will be normalized using ImageNet statistics.")

height, width = get_image_height_and_width(image_size)
transforms = A.Compose(
[
A.Resize(height=height, width=width, always_apply=True),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
]
)

if config is not None:
if isinstance(config, str):
transforms = A.load(filepath=config, data_format="yaml")
elif isinstance(config, A.Compose):
transforms = config
else:
raise ValueError("config could be either ``str`` or ``A.Compose``")

if not to_tensor:
if isinstance(transforms[-1], ToTensorV2):
transforms = A.Compose(transforms[:-1])

# always resize to specified image size
if not any(isinstance(transform, A.Resize) for transform in transforms) and image_size is not None:
height, width = get_image_height_and_width(image_size)
transforms = A.Compose([A.Resize(height=height, width=width, always_apply=True), transforms])

return transforms


class PreProcessor:
"""Applies pre-processing and data augmentations to the input and returns the transformed output.
Expand Down Expand Up @@ -74,63 +174,8 @@ def __init__(
self.image_size = image_size
self.to_tensor = to_tensor

self.transforms = self.get_transforms()

def get_transforms(self) -> A.Compose:
"""Get transforms from config or image size.

Returns:
A.Compose: List of albumentation transformations to apply to the
input image.
"""
if self.config is None and self.image_size is None:
raise ValueError(
"Both config and image_size cannot be `None`. "
"Provide either config file to de-serialize transforms "
"or image_size to get the default transformations"
)

transforms: A.Compose

if self.config is None and self.image_size is not None:
height, width = self._get_height_and_width()
transforms = A.Compose(
[
A.Resize(height=height, width=width, always_apply=True),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
]
)

if self.config is not None:
if isinstance(self.config, str):
transforms = A.load(filepath=self.config, data_format="yaml")
elif isinstance(self.config, A.Compose):
transforms = self.config
else:
raise ValueError("config could be either ``str`` or ``A.Compose``")

if not self.to_tensor:
if isinstance(transforms[-1], ToTensorV2):
transforms = A.Compose(transforms[:-1])

# always resize to specified image size
if not any(isinstance(transform, A.Resize) for transform in transforms) and self.image_size is not None:
height, width = self._get_height_and_width()
transforms = A.Compose([A.Resize(height=height, width=width, always_apply=True), transforms])

return transforms
self.transforms = get_transforms(config, image_size, to_tensor)

def __call__(self, *args, **kwargs):
"""Return transformed arguments."""
return self.transforms(*args, **kwargs)

def _get_height_and_width(self) -> Tuple[Optional[int], Optional[int]]:
"""Extract height and width from image size attribute."""
if isinstance(self.image_size, int):
return self.image_size, self.image_size
if isinstance(self.image_size, tuple):
return int(self.image_size[0]), int(self.image_size[1])
if self.image_size is None:
return None, None
raise ValueError("``image_size`` could be either int or Tuple[int, int]")
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from anomalib.models.components import AnomalyModule
from anomalib.utils.callbacks import ImageVisualizerCallback
from anomalib.utils.metrics import get_metrics
from tests.helpers.dataset import get_dataset_path


class DummyDataset(Dataset):
Expand Down Expand Up @@ -68,7 +69,7 @@ def test_step(self, batch, _):
"""Only used to trigger on_test_epoch_end."""
self.log(name="loss", value=0.0, prog_bar=True)
outputs = dict(
image_path=[Path("test1.jpg")],
image_path=[Path(get_dataset_path("bottle")) / "broken_large/000.png"],
image=torch.rand((1, 3, 100, 100)),
mask=torch.zeros((1, 100, 100)),
anomaly_maps=torch.ones((1, 100, 100)),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import glob
import os
import tempfile
from unittest import mock
from pathlib import Path

import pytest
import pytorch_lightning as pl
Expand Down Expand Up @@ -42,7 +42,7 @@ def test_add_images(dataset):
)
trainer.test(model=model, datamodule=DummyDataModule())
# test if images are logged
if len(glob.glob(os.path.join(dir_loc, "images", "*.jpg"))) != 1:
if len(list(Path(dir_loc).glob("**/*.png"))) != 1:
raise Exception("Failed to save to local path")

# test if tensorboard logs are created
Expand Down