From 6ace75d464440d9c5d60b2fa3bbf60c9b1e45916 Mon Sep 17 00:00:00 2001 From: Mrinal Jain Date: Mon, 8 Nov 2021 01:35:58 -0500 Subject: [PATCH 1/4] Making `ar_thr` available as a hyperparameter --- data/hyps/hyp.finetune.yaml | 1 + data/hyps/hyp.finetune_objects365.yaml | 1 + data/hyps/hyp.scratch-high.yaml | 1 + data/hyps/hyp.scratch-low.yaml | 1 + data/hyps/hyp.scratch-med.yaml | 1 + data/hyps/hyp.scratch.yaml | 1 + utils/augmentations.py | 6 ++++-- utils/datasets.py | 9 ++++++--- 8 files changed, 16 insertions(+), 5 deletions(-) diff --git a/data/hyps/hyp.finetune.yaml b/data/hyps/hyp.finetune.yaml index b89d66ff8dee..3429e94c7da8 100644 --- a/data/hyps/hyp.finetune.yaml +++ b/data/hyps/hyp.finetune.yaml @@ -37,3 +37,4 @@ fliplr: 0.5 mosaic: 1.0 mixup: 0.243 copy_paste: 0.0 +ar_thr: 20 diff --git a/data/hyps/hyp.finetune_objects365.yaml b/data/hyps/hyp.finetune_objects365.yaml index 073720a65be5..6f2d5fb767f2 100644 --- a/data/hyps/hyp.finetune_objects365.yaml +++ b/data/hyps/hyp.finetune_objects365.yaml @@ -29,3 +29,4 @@ fliplr: 0.5 mosaic: 1.0 mixup: 0.0 copy_paste: 0.0 +ar_thr: 20 diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml index 5a586cc63fae..2930ee584505 100644 --- a/data/hyps/hyp.scratch-high.yaml +++ b/data/hyps/hyp.scratch-high.yaml @@ -32,3 +32,4 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.1 # image mixup (probability) copy_paste: 0.1 # segment copy-paste (probability) +ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/data/hyps/hyp.scratch-low.yaml b/data/hyps/hyp.scratch-low.yaml index b9ef1d55a3b6..000a57c67d14 100644 --- a/data/hyps/hyp.scratch-low.yaml +++ b/data/hyps/hyp.scratch-low.yaml @@ -32,3 +32,4 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.0 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) +ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/data/hyps/hyp.scratch-med.yaml b/data/hyps/hyp.scratch-med.yaml index d6867d7557ba..5ef211abe299 100644 --- a/data/hyps/hyp.scratch-med.yaml +++ b/data/hyps/hyp.scratch-med.yaml @@ -32,3 +32,4 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.1 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) +ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/data/hyps/hyp.scratch.yaml b/data/hyps/hyp.scratch.yaml index 31f6d142e285..c195f8849c9a 100644 --- a/data/hyps/hyp.scratch.yaml +++ b/data/hyps/hyp.scratch.yaml @@ -32,3 +32,4 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.0 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) +ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/utils/augmentations.py b/utils/augmentations.py index 1c3e66fb87ab..6e7dbec033cd 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -123,7 +123,7 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, - border=(0, 0)): + border=(0, 0), ar_thr=20): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] @@ -204,7 +204,9 @@ def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, sc new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) # filter candidates - i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) + i = box_candidates( + box1=targets[:, 1:5].T * s, box2=new.T, ar_thr=ar_thr, area_thr=0.01 if use_segments else 0.10 + ) targets = targets[i] targets[:, 1:5] = new[i] diff --git a/utils/datasets.py b/utils/datasets.py index 15fca1775849..25ff50be9c3c 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -582,7 +582,8 @@ def __getitem__(self, index): translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], - perspective=hyp['perspective']) + perspective=hyp['perspective'], + ar_thr=hyp['ar_thr']) nl = len(labels) # number of labels if nl: @@ -729,7 +730,8 @@ def load_mosaic(self, index): scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove + border=self.mosaic_border, # border to remove + ar_thr=self.hyp['ar_thr']) return img4, labels4 @@ -803,7 +805,8 @@ def load_mosaic9(self, index): scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove + border=self.mosaic_border, # border to remove + ar_thr=self.hyp['ar_thr']) return img9, labels9 From c4395492e1cacc6210c4c0d4c4bcf66aef662443 Mon Sep 17 00:00:00 2001 From: Mrinal Jain Date: Tue, 9 Nov 2021 14:14:07 -0500 Subject: [PATCH 2/4] Disabling ar_thr as hyperparameter and computing from the dataset instead --- data/hyps/hyp.finetune.yaml | 1 - data/hyps/hyp.finetune_objects365.yaml | 1 - data/hyps/hyp.scratch-high.yaml | 1 - data/hyps/hyp.scratch-low.yaml | 1 - data/hyps/hyp.scratch-med.yaml | 1 - data/hyps/hyp.scratch.yaml | 1 - utils/datasets.py | 12 +++++++++--- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/data/hyps/hyp.finetune.yaml b/data/hyps/hyp.finetune.yaml index 3429e94c7da8..b89d66ff8dee 100644 --- a/data/hyps/hyp.finetune.yaml +++ b/data/hyps/hyp.finetune.yaml @@ -37,4 +37,3 @@ fliplr: 0.5 mosaic: 1.0 mixup: 0.243 copy_paste: 0.0 -ar_thr: 20 diff --git a/data/hyps/hyp.finetune_objects365.yaml b/data/hyps/hyp.finetune_objects365.yaml index 6f2d5fb767f2..073720a65be5 100644 --- a/data/hyps/hyp.finetune_objects365.yaml +++ b/data/hyps/hyp.finetune_objects365.yaml @@ -29,4 +29,3 @@ fliplr: 0.5 mosaic: 1.0 mixup: 0.0 copy_paste: 0.0 -ar_thr: 20 diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml index 2930ee584505..5a586cc63fae 100644 --- a/data/hyps/hyp.scratch-high.yaml +++ b/data/hyps/hyp.scratch-high.yaml @@ -32,4 +32,3 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.1 # image mixup (probability) copy_paste: 0.1 # segment copy-paste (probability) -ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/data/hyps/hyp.scratch-low.yaml b/data/hyps/hyp.scratch-low.yaml index 000a57c67d14..b9ef1d55a3b6 100644 --- a/data/hyps/hyp.scratch-low.yaml +++ b/data/hyps/hyp.scratch-low.yaml @@ -32,4 +32,3 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.0 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) -ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/data/hyps/hyp.scratch-med.yaml b/data/hyps/hyp.scratch-med.yaml index 5ef211abe299..d6867d7557ba 100644 --- a/data/hyps/hyp.scratch-med.yaml +++ b/data/hyps/hyp.scratch-med.yaml @@ -32,4 +32,3 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.1 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) -ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/data/hyps/hyp.scratch.yaml b/data/hyps/hyp.scratch.yaml index c195f8849c9a..31f6d142e285 100644 --- a/data/hyps/hyp.scratch.yaml +++ b/data/hyps/hyp.scratch.yaml @@ -32,4 +32,3 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.0 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) -ar_thr: 20 # aspect ratio threshold to filter out candidate bounding boxes diff --git a/utils/datasets.py b/utils/datasets.py index 76995cfe57c0..4540599f2f6a 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -388,6 +388,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.stride = stride self.path = path self.albumentations = Albumentations() if augment else None + self.ar_thr = 20 try: f = [] # image files @@ -457,6 +458,11 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r if segment: self.segments[i][:, 0] = 0 + # Dataset-specific update of ar_thr to prevent filtering of correct labels + concatenated_labels = np.concatenate(self.labels) # cxywh + dataset_max_ar = round((concatenated_labels[:, 3] / concatenated_labels[:, 4]).max()) + self.ar_thr = max(dataset_max_ar, self.ar_thr) + # Rectangular Training if self.rect: # Sort by aspect ratio @@ -582,7 +588,7 @@ def __getitem__(self, index): scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective'], - ar_thr=hyp['ar_thr']) + ar_thr=self.ar_thr) nl = len(labels) # number of labels if nl: @@ -730,7 +736,7 @@ def load_mosaic(self, index): shear=self.hyp['shear'], perspective=self.hyp['perspective'], border=self.mosaic_border, # border to remove - ar_thr=self.hyp['ar_thr']) + ar_thr=self.ar_thr) return img4, labels4 @@ -805,7 +811,7 @@ def load_mosaic9(self, index): shear=self.hyp['shear'], perspective=self.hyp['perspective'], border=self.mosaic_border, # border to remove - ar_thr=self.hyp['ar_thr']) + ar_thr=self.ar_thr) return img9, labels9 From 254e2fb4afc4502a03d22e0a7bd1e3e2fe73bf39 Mon Sep 17 00:00:00 2001 From: Mrinal Jain Date: Sun, 14 Nov 2021 18:07:29 -0500 Subject: [PATCH 3/4] Fixing bug in ar_thr computation --- utils/datasets.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/utils/datasets.py b/utils/datasets.py index 6de2117353fb..489e53eec9f4 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -461,8 +461,13 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.segments[i][:, 0] = 0 # Dataset-specific update of ar_thr to prevent filtering of correct labels + labels_per_example = np.array(list(map(len, self.labels))) + broadcasted_shapes = np.repeat(self.shapes, repeats=labels_per_example, axis=0) # wh concatenated_labels = np.concatenate(self.labels) # cxywh - dataset_max_ar = round((concatenated_labels[:, 3] / concatenated_labels[:, 4]).max()) + + unnormalized_widths = concatenated_labels[:, 3] * broadcasted_shapes[:, 0] + unnormalized_heights = concatenated_labels[:, 4] * broadcasted_shapes[:, 1] + dataset_max_ar = round((unnormalized_widths / unnormalized_heights).max()) self.ar_thr = max(dataset_max_ar, self.ar_thr) # Rectangular Training From 12993afa6fa5e3d912082b19cccb76198ea166b0 Mon Sep 17 00:00:00 2001 From: Mrinal Jain Date: Tue, 14 Dec 2021 22:33:12 +0530 Subject: [PATCH 4/4] Fix `ar_thr` to 100 --- utils/augmentations.py | 8 +++----- utils/datasets.py | 20 +++----------------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/utils/augmentations.py b/utils/augmentations.py index e59f2cc95064..0311b97b63db 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -122,7 +122,7 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, - border=(0, 0), ar_thr=20): + border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] @@ -203,9 +203,7 @@ def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, sc new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) # filter candidates - i = box_candidates( - box1=targets[:, 1:5].T * s, box2=new.T, ar_thr=ar_thr, area_thr=0.01 if use_segments else 0.10 - ) + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) targets = targets[i] targets[:, 1:5] = new[i] @@ -271,7 +269,7 @@ def mixup(im, labels, im2, labels2): return im, labels -def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) +def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] diff --git a/utils/datasets.py b/utils/datasets.py index 3575e2a3b9db..79b871c9294b 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -389,7 +389,6 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.stride = stride self.path = path self.albumentations = Albumentations() if augment else None - self.ar_thr = 20 try: f = [] # image files @@ -459,16 +458,6 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r if segment: self.segments[i][:, 0] = 0 - # Dataset-specific update of ar_thr to prevent filtering of correct labels - labels_per_example = np.array(list(map(len, self.labels))) - broadcasted_shapes = np.repeat(self.shapes, repeats=labels_per_example, axis=0) # wh - concatenated_labels = np.concatenate(self.labels) # cxywh - - unnormalized_widths = concatenated_labels[:, 3] * broadcasted_shapes[:, 0] - unnormalized_heights = concatenated_labels[:, 4] * broadcasted_shapes[:, 1] - dataset_max_ar = round((unnormalized_widths / unnormalized_heights).max()) - self.ar_thr = max(dataset_max_ar, self.ar_thr) - # Rectangular Training if self.rect: # Sort by aspect ratio @@ -593,8 +582,7 @@ def __getitem__(self, index): translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], - perspective=hyp['perspective'], - ar_thr=self.ar_thr) + perspective=hyp['perspective']) nl = len(labels) # number of labels if nl: @@ -742,8 +730,7 @@ def load_mosaic(self, index): scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], - border=self.mosaic_border, # border to remove - ar_thr=self.ar_thr) + border=self.mosaic_border) # border to remove return img4, labels4 @@ -817,8 +804,7 @@ def load_mosaic9(self, index): scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], - border=self.mosaic_border, # border to remove - ar_thr=self.ar_thr) + border=self.mosaic_border) # border to remove return img9, labels9