From d2d732bfe0acc9d64f8a2d4fff171a6b792cc11b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 6 Aug 2022 11:30:54 +0200 Subject: [PATCH] `smart_optimizer()` improved reporting (#8887) Update smart_optimizer() weight_decay reporting --- train.py | 1 - utils/torch_utils.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index 380113c574d6..cd24c8d2c8d5 100644 --- a/train.py +++ b/train.py @@ -152,7 +152,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay - LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler diff --git a/utils/torch_utils.py b/utils/torch_utils.py index d5615c263e43..073d9d1c7b3d 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -276,7 +276,7 @@ def copy_attr(a, b, include=(), exclude=()): setattr(a, k, v) -def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, weight_decay=1e-5): +def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5): # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay g = [], [], [] # optimizer parameter groups bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() @@ -299,10 +299,10 @@ def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, weight_decay=1e- else: raise NotImplementedError(f'Optimizer {name} not implemented.') - optimizer.add_param_group({'params': g[0], 'weight_decay': weight_decay}) # add g0 with weight_decay + optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights) - LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " - f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias") + LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups " + f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias") return optimizer