Skip to content

Commit

Permalink
assign test split for folder dataset (#220)
Browse files Browse the repository at this point in the history
* define test split for folder

* assign normal_test

* dir names fixed

* fix test
  • Loading branch information
alexriedel1 authored Apr 12, 2022
1 parent 487ff45 commit 2da55ae
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 35 deletions.
5 changes: 3 additions & 2 deletions anomalib/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,10 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> LightningDataModule
elif config.dataset.format.lower() == "folder":
datamodule = FolderDataModule(
root=config.dataset.path,
normal=config.dataset.normal,
abnormal=config.dataset.abnormal,
normal_dir=config.dataset.normal_dir,
abnormal_dir=config.dataset.abnormal_dir,
task=config.dataset.task,
normal_test_dir=config.dataset.normal_test_dir,
mask_dir=config.dataset.mask,
extensions=config.dataset.extensions,
split_ratio=config.dataset.split_ratio,
Expand Down
97 changes: 66 additions & 31 deletions anomalib/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,37 @@ def _check_and_convert_path(path: Union[str, Path]) -> Path:
return path


def _prepare_files_labels(
path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None
) -> Tuple[list, list]:
"""Return a list of filenames and list corresponding labels.
Args:
path (Union[str, Path]): Path to the directory containing images.
path_type (str): Type of images in the provided path ("normal", "abnormal", "normal_test")
extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
directory.
Returns:
List, List: Filenames of the images provided in the paths, labels of the images provided in the paths
"""
path = _check_and_convert_path(path)
if extensions is None:
extensions = IMG_EXTENSIONS

filenames = [f for f in path.glob(r"**/*") if f.suffix in extensions]
if len(filenames) == 0:
raise RuntimeError(f"Found 0 {path_type} images in {path}")

labels = [path_type] * len(filenames)

return filenames, labels


def make_dataset(
normal_dir: Union[str, Path],
abnormal_dir: Union[str, Path],
normal_test_dir: Optional[Union[str, Path]] = None,
mask_dir: Optional[Union[str, Path]] = None,
split: Optional[str] = None,
split_ratio: float = 0.2,
Expand All @@ -72,6 +100,9 @@ def make_dataset(
Args:
normal_dir (Union[str, Path]): Path to the directory containing normal images.
abnormal_dir (Union[str, Path]): Path to the directory containing abnormal images.
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Normal test images will be a split of `normal_dir`
if `None`. Defaults to None.
mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
the mask annotations. Defaults to None.
split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None.
Expand All @@ -87,40 +118,31 @@ def make_dataset(
Returns:
DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
"""
normal_dir = _check_and_convert_path(normal_dir)
abnormal_dir = _check_and_convert_path(abnormal_dir)

if extensions is None:
extensions = IMG_EXTENSIONS

# Get filenames from normal and abnormal directory.
normal_filenames = [f for f in normal_dir.glob(r"**/*") if f.suffix in extensions]
abnormal_filenames = [f for f in abnormal_dir.glob(r"**/*") if f.suffix in extensions]
filenames = normal_filenames + abnormal_filenames

if len(normal_filenames) == 0:
raise RuntimeError(f"Found 0 normal images in {normal_dir}")
filenames = []
labels = []
dirs = {"normal": normal_dir, "abnormal": abnormal_dir}

if len(abnormal_filenames) == 0:
raise RuntimeError(f"Found 0 annormal images in {abnormal_dir}")
if normal_test_dir:
dirs = {**dirs, **{"normal_test": normal_test_dir}}

# Add normal and abnormal labels to the samples as `label` column.
normal_labels = ["normal"] * len(normal_filenames)
abnormal_labels = ["abnormal"] * len(abnormal_filenames)
labels = normal_labels + abnormal_labels
for dir_type, path in dirs.items():
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label

samples = DataFrame({"image_path": filenames, "label": labels})

# Create label index for normal (0) and abnormal (1) images.
samples.loc[(samples.label == "normal"), "label_index"] = 0
samples.loc[(samples.label == "normal") | (samples.label == "normal_test"), "label_index"] = 0
samples.loc[(samples.label == "abnormal"), "label_index"] = 1
samples.label_index = samples.label_index.astype(int)

# If a path to mask is provided, add it to the sample dataframe.
if mask_dir is not None:
mask_dir = _check_and_convert_path(mask_dir)
normal_gt = ["" for f in normal_filenames]
abnormal_gt = [str(mask_dir / f.name) for f in abnormal_filenames]
normal_gt = ["" for f in samples.loc[samples.label_index == 0]["image_path"]]
abnormal_gt = [str(mask_dir / f.name) for f in samples.loc[samples.label_index == 1]["image_path"]]
gt_filenames = normal_gt + abnormal_gt

samples["mask_path"] = gt_filenames
Expand All @@ -133,10 +155,12 @@ def make_dataset(
# By default, all the normal samples are assigned as train.
# and all the abnormal samples are test.
samples.loc[(samples.label == "normal"), "split"] = "train"
samples.loc[(samples.label == "abnormal"), "split"] = "test"
samples = split_normal_images_in_train_set(
samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal"
)
samples.loc[(samples.label == "abnormal") | (samples.label == "normal_test"), "split"] = "test"

if not normal_test_dir:
samples = split_normal_images_in_train_set(
samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal"
)

# If `create_validation_set` is set to True, the test set is split into half.
if create_validation_set:
Expand All @@ -159,6 +183,7 @@ def __init__(
abnormal_dir: Union[Path, str],
split: str,
pre_process: PreProcessor,
normal_test_dir: Optional[Union[Path, str]] = None,
split_ratio: float = 0.2,
mask_dir: Optional[Union[Path, str]] = None,
extensions: Optional[Tuple[str, ...]] = None,
Expand All @@ -174,6 +199,8 @@ def __init__(
split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None.
pre_process (Optional[PreProcessor], optional): Image Pro-processor to apply transform.
Defaults to None.
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
split_ratio (float, optional): Ratio to split normal training images and add to the
test set in case test set doesn't contain any normal images.
Defaults to 0.2.
Expand Down Expand Up @@ -207,6 +234,7 @@ def __init__(
self.samples = make_dataset(
normal_dir=normal_dir,
abnormal_dir=abnormal_dir,
normal_test_dir=normal_test_dir,
mask_dir=mask_dir,
split=split,
split_ratio=split_ratio,
Expand Down Expand Up @@ -268,9 +296,10 @@ class FolderDataModule(LightningDataModule):
def __init__(
self,
root: Union[str, Path],
normal: str = "normal",
abnormal: str = "abnormal",
normal_dir: str = "normal",
abnormal_dir: str = "abnormal",
task: str = "classification",
normal_test_dir: Optional[Union[Path, str]] = None,
mask_dir: Optional[Union[Path, str]] = None,
extensions: Optional[Tuple[str, ...]] = None,
split_ratio: float = 0.2,
Expand All @@ -287,12 +316,14 @@ def __init__(
Args:
root (Union[str, Path]): Path to the root folder containing normal and abnormal dirs.
normal (str, optional): Name of the directory containing normal images.
normal_dir (str, optional): Name of the directory containing normal images.
Defaults to "normal".
abnormal (str, optional): Name of the directory containing abnormal images.
abnormal_dir (str, optional): Name of the directory containing abnormal images.
Defaults to "abnormal".
task (str, optional): Task type. Could be either classification or segmentation.
Defaults to "classification".
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
the mask annotations. Defaults to None.
extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
Expand Down Expand Up @@ -382,8 +413,11 @@ def __init__(
super().__init__()

self.root = _check_and_convert_path(root)
self.normal_dir = self.root / normal
self.abnormal_dir = self.root / abnormal
self.normal_dir = self.root / normal_dir
self.abnormal_dir = self.root / abnormal_dir
self.normal_test = normal_test_dir
if normal_test_dir:
self.normal_test = self.root / normal_test_dir
self.mask_dir = mask_dir
self.extensions = extensions
self.split_ratio = split_ratio
Expand Down Expand Up @@ -457,6 +491,7 @@ def setup(self, stage: Optional[str] = None) -> None:
normal_dir=self.normal_dir,
abnormal_dir=self.abnormal_dir,
split="test",
normal_test_dir=self.normal_test,
split_ratio=self.split_ratio,
mask_dir=self.mask_dir,
pre_process=self.pre_process_val,
Expand Down
4 changes: 2 additions & 2 deletions tests/pre_merge/datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def folder_data_module():
root = get_dataset_path(dataset="bottle")
datamodule = FolderDataModule(
root=root,
normal="good",
abnormal="broken_large",
normal_dir="good",
abnormal_dir="broken_large",
mask_dir=os.path.join(root, "ground_truth/broken_large"),
task="segmentation",
split_ratio=0.2,
Expand Down

0 comments on commit 2da55ae

Please sign in to comment.