diff --git a/train.py b/train.py index e1393213bb4b..dd5eeb600a76 100644 --- a/train.py +++ b/train.py @@ -294,7 +294,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) - stopper = EarlyStopping(patience=opt.patience) + stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model) # init loss class callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' @@ -402,6 +402,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr @@ -428,19 +429,14 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) - # Stop Single-GPU - if RANK == -1 and stopper(epoch=epoch, fitness=fi): - break - - # Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576 - # stop = stopper(epoch=epoch, fitness=fi) - # if RANK == 0: - # dist.broadcast_object_list([stop], 0) # broadcast 'stop' to all ranks - - # Stop DPP - # with torch_distributed_zero_first(RANK): - # if stop: - # break # must break all DDP ranks + # EarlyStopping + if RANK != -1: # if DDP training + broadcast_list = [stop if RANK == 0 else None] + dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks + if RANK != 0: + stop = broadcast_list[0] + if stop: + break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training -----------------------------------------------------------------------------------------------------