From 6ace75d464440d9c5d60b2fa3bbf60c9b1e45916 Mon Sep 17 00:00:00 2001
From: Mrinal Jain <mrinaljain007@gmail.com>
Date: Mon, 8 Nov 2021 01:35:58 -0500
Subject: [PATCH 1/4] Making `ar_thr` available as a hyperparameter

---
 data/hyps/hyp.finetune.yaml            | 1 +
 data/hyps/hyp.finetune_objects365.yaml | 1 +
 data/hyps/hyp.scratch-high.yaml        | 1 +
 data/hyps/hyp.scratch-low.yaml         | 1 +
 data/hyps/hyp.scratch-med.yaml         | 1 +
 data/hyps/hyp.scratch.yaml             | 1 +
 utils/augmentations.py                 | 6 ++++--
 utils/datasets.py                      | 9 ++++++---
 8 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/data/hyps/hyp.finetune.yaml b/data/hyps/hyp.finetune.yaml
index b89d66ff8dee..3429e94c7da8 100644
--- a/data/hyps/hyp.finetune.yaml
+++ b/data/hyps/hyp.finetune.yaml
@@ -37,3 +37,4 @@ fliplr: 0.5
 mosaic: 1.0
 mixup: 0.243
 copy_paste: 0.0
+ar_thr: 20
diff --git a/data/hyps/hyp.finetune_objects365.yaml b/data/hyps/hyp.finetune_objects365.yaml
index 073720a65be5..6f2d5fb767f2 100644
--- a/data/hyps/hyp.finetune_objects365.yaml
+++ b/data/hyps/hyp.finetune_objects365.yaml
@@ -29,3 +29,4 @@ fliplr: 0.5
 mosaic: 1.0
 mixup: 0.0
 copy_paste: 0.0
+ar_thr: 20
diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml
index 5a586cc63fae..2930ee584505 100644
--- a/data/hyps/hyp.scratch-high.yaml
+++ b/data/hyps/hyp.scratch-high.yaml
@@ -32,3 +32,4 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.1  # image mixup (probability)
 copy_paste: 0.1  # segment copy-paste (probability)
+ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/data/hyps/hyp.scratch-low.yaml b/data/hyps/hyp.scratch-low.yaml
index b9ef1d55a3b6..000a57c67d14 100644
--- a/data/hyps/hyp.scratch-low.yaml
+++ b/data/hyps/hyp.scratch-low.yaml
@@ -32,3 +32,4 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.0  # image mixup (probability)
 copy_paste: 0.0  # segment copy-paste (probability)
+ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/data/hyps/hyp.scratch-med.yaml b/data/hyps/hyp.scratch-med.yaml
index d6867d7557ba..5ef211abe299 100644
--- a/data/hyps/hyp.scratch-med.yaml
+++ b/data/hyps/hyp.scratch-med.yaml
@@ -32,3 +32,4 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.1  # image mixup (probability)
 copy_paste: 0.0  # segment copy-paste (probability)
+ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/data/hyps/hyp.scratch.yaml b/data/hyps/hyp.scratch.yaml
index 31f6d142e285..c195f8849c9a 100644
--- a/data/hyps/hyp.scratch.yaml
+++ b/data/hyps/hyp.scratch.yaml
@@ -32,3 +32,4 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.0  # image mixup (probability)
 copy_paste: 0.0  # segment copy-paste (probability)
+ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/utils/augmentations.py b/utils/augmentations.py
index 1c3e66fb87ab..6e7dbec033cd 100644
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@@ -123,7 +123,7 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF
 
 
 def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
-                       border=(0, 0)):
+                       border=(0, 0), ar_thr=20):
     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
     # targets = [cls, xyxy]
 
@@ -204,7 +204,9 @@ def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, sc
             new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
 
         # filter candidates
-        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
+        i = box_candidates(
+            box1=targets[:, 1:5].T * s, box2=new.T, ar_thr=ar_thr, area_thr=0.01 if use_segments else 0.10
+        )
         targets = targets[i]
         targets[:, 1:5] = new[i]
 
diff --git a/utils/datasets.py b/utils/datasets.py
index 15fca1775849..25ff50be9c3c 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -582,7 +582,8 @@ def __getitem__(self, index):
                                                  translate=hyp['translate'],
                                                  scale=hyp['scale'],
                                                  shear=hyp['shear'],
-                                                 perspective=hyp['perspective'])
+                                                 perspective=hyp['perspective'],
+                                                 ar_thr=hyp['ar_thr'])
 
         nl = len(labels)  # number of labels
         if nl:
@@ -729,7 +730,8 @@ def load_mosaic(self, index):
                                        scale=self.hyp['scale'],
                                        shear=self.hyp['shear'],
                                        perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border)  # border to remove
+                                       border=self.mosaic_border,  # border to remove
+                                       ar_thr=self.hyp['ar_thr'])
 
     return img4, labels4
 
@@ -803,7 +805,8 @@ def load_mosaic9(self, index):
                                        scale=self.hyp['scale'],
                                        shear=self.hyp['shear'],
                                        perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border)  # border to remove
+                                       border=self.mosaic_border,  # border to remove
+                                       ar_thr=self.hyp['ar_thr'])
 
     return img9, labels9
 

From c4395492e1cacc6210c4c0d4c4bcf66aef662443 Mon Sep 17 00:00:00 2001
From: Mrinal Jain <mrinaljain007@gmail.com>
Date: Tue, 9 Nov 2021 14:14:07 -0500
Subject: [PATCH 2/4] Disabling ar_thr as hyperparameter and computing from the
 dataset instead

---
 data/hyps/hyp.finetune.yaml            |  1 -
 data/hyps/hyp.finetune_objects365.yaml |  1 -
 data/hyps/hyp.scratch-high.yaml        |  1 -
 data/hyps/hyp.scratch-low.yaml         |  1 -
 data/hyps/hyp.scratch-med.yaml         |  1 -
 data/hyps/hyp.scratch.yaml             |  1 -
 utils/datasets.py                      | 12 +++++++++---
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/data/hyps/hyp.finetune.yaml b/data/hyps/hyp.finetune.yaml
index 3429e94c7da8..b89d66ff8dee 100644
--- a/data/hyps/hyp.finetune.yaml
+++ b/data/hyps/hyp.finetune.yaml
@@ -37,4 +37,3 @@ fliplr: 0.5
 mosaic: 1.0
 mixup: 0.243
 copy_paste: 0.0
-ar_thr: 20
diff --git a/data/hyps/hyp.finetune_objects365.yaml b/data/hyps/hyp.finetune_objects365.yaml
index 6f2d5fb767f2..073720a65be5 100644
--- a/data/hyps/hyp.finetune_objects365.yaml
+++ b/data/hyps/hyp.finetune_objects365.yaml
@@ -29,4 +29,3 @@ fliplr: 0.5
 mosaic: 1.0
 mixup: 0.0
 copy_paste: 0.0
-ar_thr: 20
diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml
index 2930ee584505..5a586cc63fae 100644
--- a/data/hyps/hyp.scratch-high.yaml
+++ b/data/hyps/hyp.scratch-high.yaml
@@ -32,4 +32,3 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.1  # image mixup (probability)
 copy_paste: 0.1  # segment copy-paste (probability)
-ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/data/hyps/hyp.scratch-low.yaml b/data/hyps/hyp.scratch-low.yaml
index 000a57c67d14..b9ef1d55a3b6 100644
--- a/data/hyps/hyp.scratch-low.yaml
+++ b/data/hyps/hyp.scratch-low.yaml
@@ -32,4 +32,3 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.0  # image mixup (probability)
 copy_paste: 0.0  # segment copy-paste (probability)
-ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/data/hyps/hyp.scratch-med.yaml b/data/hyps/hyp.scratch-med.yaml
index 5ef211abe299..d6867d7557ba 100644
--- a/data/hyps/hyp.scratch-med.yaml
+++ b/data/hyps/hyp.scratch-med.yaml
@@ -32,4 +32,3 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.1  # image mixup (probability)
 copy_paste: 0.0  # segment copy-paste (probability)
-ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/data/hyps/hyp.scratch.yaml b/data/hyps/hyp.scratch.yaml
index c195f8849c9a..31f6d142e285 100644
--- a/data/hyps/hyp.scratch.yaml
+++ b/data/hyps/hyp.scratch.yaml
@@ -32,4 +32,3 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.0  # image mixup (probability)
 copy_paste: 0.0  # segment copy-paste (probability)
-ar_thr: 20  # aspect ratio threshold to filter out candidate bounding boxes
diff --git a/utils/datasets.py b/utils/datasets.py
index 76995cfe57c0..4540599f2f6a 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -388,6 +388,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
         self.stride = stride
         self.path = path
         self.albumentations = Albumentations() if augment else None
+        self.ar_thr = 20
 
         try:
             f = []  # image files
@@ -457,6 +458,11 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
                 if segment:
                     self.segments[i][:, 0] = 0
 
+        # Dataset-specific update of ar_thr to prevent filtering of correct labels
+        concatenated_labels = np.concatenate(self.labels)  # cxywh
+        dataset_max_ar = round((concatenated_labels[:, 3] / concatenated_labels[:, 4]).max())
+        self.ar_thr = max(dataset_max_ar, self.ar_thr)
+
         # Rectangular Training
         if self.rect:
             # Sort by aspect ratio
@@ -582,7 +588,7 @@ def __getitem__(self, index):
                                                  scale=hyp['scale'],
                                                  shear=hyp['shear'],
                                                  perspective=hyp['perspective'],
-                                                 ar_thr=hyp['ar_thr'])
+                                                 ar_thr=self.ar_thr)
 
         nl = len(labels)  # number of labels
         if nl:
@@ -730,7 +736,7 @@ def load_mosaic(self, index):
                                        shear=self.hyp['shear'],
                                        perspective=self.hyp['perspective'],
                                        border=self.mosaic_border,  # border to remove
-                                       ar_thr=self.hyp['ar_thr'])
+                                       ar_thr=self.ar_thr)
 
     return img4, labels4
 
@@ -805,7 +811,7 @@ def load_mosaic9(self, index):
                                        shear=self.hyp['shear'],
                                        perspective=self.hyp['perspective'],
                                        border=self.mosaic_border,  # border to remove
-                                       ar_thr=self.hyp['ar_thr'])
+                                       ar_thr=self.ar_thr)
 
     return img9, labels9
 

From 254e2fb4afc4502a03d22e0a7bd1e3e2fe73bf39 Mon Sep 17 00:00:00 2001
From: Mrinal Jain <mrinaljain007@gmail.com>
Date: Sun, 14 Nov 2021 18:07:29 -0500
Subject: [PATCH 3/4] Fixing bug in ar_thr computation

---
 utils/datasets.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/utils/datasets.py b/utils/datasets.py
index 6de2117353fb..489e53eec9f4 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -461,8 +461,13 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
                     self.segments[i][:, 0] = 0
 
         # Dataset-specific update of ar_thr to prevent filtering of correct labels
+        labels_per_example = np.array(list(map(len, self.labels)))
+        broadcasted_shapes = np.repeat(self.shapes, repeats=labels_per_example, axis=0)  # wh
         concatenated_labels = np.concatenate(self.labels)  # cxywh
-        dataset_max_ar = round((concatenated_labels[:, 3] / concatenated_labels[:, 4]).max())
+
+        unnormalized_widths = concatenated_labels[:, 3] * broadcasted_shapes[:, 0]
+        unnormalized_heights = concatenated_labels[:, 4] * broadcasted_shapes[:, 1]
+        dataset_max_ar = round((unnormalized_widths / unnormalized_heights).max())
         self.ar_thr = max(dataset_max_ar, self.ar_thr)
 
         # Rectangular Training

From 12993afa6fa5e3d912082b19cccb76198ea166b0 Mon Sep 17 00:00:00 2001
From: Mrinal Jain <mrinaljain007@gmail.com>
Date: Tue, 14 Dec 2021 22:33:12 +0530
Subject: [PATCH 4/4] Fix `ar_thr` to 100

---
 utils/augmentations.py |  8 +++-----
 utils/datasets.py      | 20 +++-----------------
 2 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/utils/augmentations.py b/utils/augmentations.py
index e59f2cc95064..0311b97b63db 100644
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@@ -122,7 +122,7 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF
 
 
 def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
-                       border=(0, 0), ar_thr=20):
+                       border=(0, 0)):
     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
     # targets = [cls, xyxy]
 
@@ -203,9 +203,7 @@ def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, sc
             new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
 
         # filter candidates
-        i = box_candidates(
-            box1=targets[:, 1:5].T * s, box2=new.T, ar_thr=ar_thr, area_thr=0.01 if use_segments else 0.10
-        )
+        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
         targets = targets[i]
         targets[:, 1:5] = new[i]
 
@@ -271,7 +269,7 @@ def mixup(im, labels, im2, labels2):
     return im, labels
 
 
-def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
+def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
     # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
     w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
     w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
diff --git a/utils/datasets.py b/utils/datasets.py
index 3575e2a3b9db..79b871c9294b 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -389,7 +389,6 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
         self.stride = stride
         self.path = path
         self.albumentations = Albumentations() if augment else None
-        self.ar_thr = 20
 
         try:
             f = []  # image files
@@ -459,16 +458,6 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
                 if segment:
                     self.segments[i][:, 0] = 0
 
-        # Dataset-specific update of ar_thr to prevent filtering of correct labels
-        labels_per_example = np.array(list(map(len, self.labels)))
-        broadcasted_shapes = np.repeat(self.shapes, repeats=labels_per_example, axis=0)  # wh
-        concatenated_labels = np.concatenate(self.labels)  # cxywh
-
-        unnormalized_widths = concatenated_labels[:, 3] * broadcasted_shapes[:, 0]
-        unnormalized_heights = concatenated_labels[:, 4] * broadcasted_shapes[:, 1]
-        dataset_max_ar = round((unnormalized_widths / unnormalized_heights).max())
-        self.ar_thr = max(dataset_max_ar, self.ar_thr)
-
         # Rectangular Training
         if self.rect:
             # Sort by aspect ratio
@@ -593,8 +582,7 @@ def __getitem__(self, index):
                                                  translate=hyp['translate'],
                                                  scale=hyp['scale'],
                                                  shear=hyp['shear'],
-                                                 perspective=hyp['perspective'],
-                                                 ar_thr=self.ar_thr)
+                                                 perspective=hyp['perspective'])
 
         nl = len(labels)  # number of labels
         if nl:
@@ -742,8 +730,7 @@ def load_mosaic(self, index):
                                        scale=self.hyp['scale'],
                                        shear=self.hyp['shear'],
                                        perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border,  # border to remove
-                                       ar_thr=self.ar_thr)
+                                       border=self.mosaic_border)  # border to remove
 
     return img4, labels4
 
@@ -817,8 +804,7 @@ def load_mosaic9(self, index):
                                        scale=self.hyp['scale'],
                                        shear=self.hyp['shear'],
                                        perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border,  # border to remove
-                                       ar_thr=self.ar_thr)
+                                       border=self.mosaic_border)  # border to remove
 
     return img9, labels9