No warm up + Moved dropout back ultralytics#15

Also changed constants to hyperparameters
manole-alexandru · Apr 12, 2023 · 0ad7de0 · 0ad7de0
1 parent 8ead728
commit 0ad7de0
Show file tree

Hide file tree

Showing 9 changed files with 23 additions and 17 deletions.
diff --git a/data/hyps/hyp.Objects365.yaml b/data/hyps/hyp.Objects365.yaml
@@ -7,11 +7,12 @@ lr0: 0.00258
 lrf: 0.17
 momentum: 0.779
 weight_decay: 0.00058
-warmup_epochs: 1.33
+warmup_epochs: 0
 warmup_momentum: 0.86
 warmup_bias_lr: 0.0711
 box: 0.0539
-seg: 0.1 # seg loss
+seg: 1 # Weight for segmentation loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.299
 cls_pw: 0.825
 obj: 0.632

diff --git a/data/hyps/hyp.VOC.yaml b/data/hyps/hyp.VOC.yaml
@@ -13,11 +13,12 @@ lr0: 0.00334
 lrf: 0.15135
 momentum: 0.74832
 weight_decay: 0.00025
-warmup_epochs: 3.3835
+warmup_epochs: 0
 warmup_momentum: 0.59462
 warmup_bias_lr: 0.18657
 box: 0.02
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.21638
 cls_pw: 0.5
 obj: 0.51728

diff --git a/data/hyps/hyp.no-augmentation.yaml b/data/hyps/hyp.no-augmentation.yaml
@@ -7,11 +7,12 @@ lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 0.7  # obj loss gain (scale with pixels)

diff --git a/data/hyps/hyp.scratch-high.yaml b/data/hyps/hyp.scratch-high.yaml
@@ -7,11 +7,12 @@ lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 0.7  # obj loss gain (scale with pixels)

diff --git a/data/hyps/hyp.scratch-low.yaml b/data/hyps/hyp.scratch-low.yaml
@@ -7,11 +7,12 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.5  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 1.0  # obj loss gain (scale with pixels)

diff --git a/data/hyps/hyp.scratch-med.yaml b/data/hyps/hyp.scratch-med.yaml
@@ -7,11 +7,12 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_epochs: 0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 0.05  # box loss gain
-seg: 0.1 # seg loss
+seg: 1 # seg loss
+det: 1 # Weights all detection losses in the same time (instead of having to change all 3 values)
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 0.7  # obj loss gain (scale with pixels)

diff --git a/models/common.py b/models/common.py
@@ -863,18 +863,16 @@ def __init__(self, in_channels):
 
     def forward(self, x):
         # print('----entry shape', x.shape, '---\n')
-        # x = self.dropout_weak(x)
         x = self.cv1(x)
         x = self.upsample(x)
         # x = self.relu(x)
         # print('----upsample shape', x.shape, '---\n')
-        # x = self.dropout_normal(x)
         x = self.cv2(x)
         x = self.upsample(x)
 
         # x = self.relu(x)
-        x = self.cv3(x)
         x = self.dropout_normal(x)
+        x = self.cv3(x)
         # print('----out shape', x.shape, '---\n')
         # x = self.sigmoid(x)
         return x

diff --git a/train.py b/train.py
@@ -241,7 +241,9 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
     hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
 
-    hyp['seg'] = 1
+    hyp['box'] *= hyp['det']
+    hyp['cls'] *= hyp['det']
+    hyp['obj'] *= hyp['det']
 
     hyp['label_smoothing'] = opt.label_smoothing
     model.nc = nc  # attach number of classes to model

diff --git a/utils/loss.py b/utils/loss.py
@@ -204,7 +204,7 @@ def __call__(self, preds, targets, seg_masks):  # predictions, targets
 
         # return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
         # return total_loss, torch.cat((lbox, lobj, lcls, lseg)).detach()
-        return (lbox + lobj + lcls) * bs * 0, lseg * bs * 1, torch.cat((lbox, lobj, lcls, lseg)).detach()
+        return (lbox + lobj + lcls) * bs, lseg * bs, torch.cat((lbox, lobj, lcls, lseg)).detach()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)