Skip to content

Commit

Permalink
DDP Multi-GPU --resume bug fix (#1810)
Browse files Browse the repository at this point in the history
  • Loading branch information
glenn-jocher committed Dec 30, 2020
1 parent 73cf75f commit 7180b22
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion train.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,9 +472,10 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
if opt.resume: # resume an interrupted run
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
apriori = opt.global_rank, opt.local_rank
with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace
opt.cfg, opt.weights, opt.resume = '', ckpt, True
opt.cfg, opt.weights, opt.resume, opt.global_rank, opt.local_rank = '', ckpt, True, *apriori # reinstate
logger.info('Resuming training from %s' % ckpt)
else:
# opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
Expand Down

0 comments on commit 7180b22

Please sign in to comment.