From 360d360fc5bc4061ba71a6b0911bbf0f71a01f2f Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 16 Sep 2020 10:39:03 +0200 Subject: [PATCH 1/9] comment --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index bbb69c7e2b53..68c77bbf7b87 100644 --- a/train.py +++ b/train.py @@ -393,7 +393,7 @@ def train(hyp, opt, device, tb_writer=None): parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') - parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') + parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name}.txt if supplied') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') From 9f63d845c79f9e39946dbae4c38f7b4ff1c4637a Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 16 Sep 2020 11:57:14 +0200 Subject: [PATCH 2/9] fix parsing --- utils/general.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/utils/general.py b/utils/general.py index 2530b10efb11..f58f7850be73 100755 --- a/utils/general.py +++ b/utils/general.py @@ -7,6 +7,7 @@ import shutil import subprocess import time +import re from contextlib import contextmanager from copy import copy from pathlib import Path @@ -952,9 +953,12 @@ def increment_dir(dir, comment=''): # Increments a directory runs/exp1 --> runs/exp2_comment n = 0 # number dir = str(Path(dir)) # os-agnostic - d = sorted(glob.glob(dir + '*')) # directories - if len(d): - n = max([int(x[len(dir):x.rfind('_') if '_' in Path(x).name else None]) for x in d]) + 1 # increment + dirs = sorted(glob.glob(dir + '*')) # directories + if dirs: + matches = [re.search(r"exp(\d+)", d) for d in dirs] + idxs = [int(m.groups()[0]) for m in matches if m] + if idxs: + n = max(idxs) + 1 # increment return dir + str(n) + ('_' + comment if comment else '') From 5e9a72bfebc1a8acbe139b943a30f31714b54f22 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 25 Sep 2020 12:03:07 +0200 Subject: [PATCH 3/9] fix evolve --- train.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/train.py b/train.py index 68c77bbf7b87..0968f7ea8ff0 100644 --- a/train.py +++ b/train.py @@ -6,6 +6,7 @@ import shutil import time from pathlib import Path +from pprint import pprint import numpy as np import torch.distributed as dist @@ -207,7 +208,8 @@ def train(hyp, opt, device, tb_writer=None): results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) - logger.info('Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n' + logger.info('Image sizes %g train, %g test\n' + 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() @@ -440,7 +442,7 @@ def train(hyp, opt, device, tb_writer=None): assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' opt.batch_size = opt.total_batch_size // opt.world_size - logger.info(opt) + pprint(vars(opt)) with open(opt.hyp) as f: hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps @@ -448,7 +450,7 @@ def train(hyp, opt, device, tb_writer=None): if not opt.evolve: tb_writer = None if opt.global_rank in [-1, 0]: - logger.info('Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % opt.logdir) + logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/') tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0 train(hyp, opt, device, tb_writer) @@ -470,7 +472,8 @@ def train(hyp, opt, device, tb_writer=None): 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold - 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) + # temp fix for https://github.com/ultralytics/yolov5/issues/607#issuecomment-692589883 + # 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) @@ -532,5 +535,5 @@ def train(hyp, opt, device, tb_writer=None): # Plot results plot_evolution(yaml_file) - print('Hyperparameter evolution complete. Best results saved as: %s\nCommand to train a new model with these ' - 'hyperparameters: $ python train.py --hyp %s' % (yaml_file, yaml_file)) + print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' + f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') From 27ddecd9076846c0baadd01eac8ab8bde6700bb9 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 25 Sep 2020 22:25:42 +0200 Subject: [PATCH 4/9] folder --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 0968f7ea8ff0..8bb5dfda1ab8 100644 --- a/train.py +++ b/train.py @@ -491,7 +491,7 @@ def train(hyp, opt, device, tb_writer=None): assert opt.local_rank == -1, 'DDP mode not implemented for --evolve' opt.notest, opt.nosave = True, True # only test/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices - yaml_file = Path('runs/evolve/hyp_evolved.yaml') # save best result here + yaml_file = Path(opt.logdir) / 'evolve' / 'hyp_evolved.yaml' # save best result here if opt.bucket: os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists From 4cebfc8c44fa0771c062bcec57e03331f8838bc1 Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 29 Sep 2020 09:08:50 +0200 Subject: [PATCH 5/9] tqdm --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 8bb5dfda1ab8..563916529ace 100644 --- a/train.py +++ b/train.py @@ -495,7 +495,7 @@ def train(hyp, opt, device, tb_writer=None): if opt.bucket: os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists - for _ in range(300): # generations to evolve + for _ in tqdm(range(300), desc='perform evolve >>'): # generations to evolve if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' From e10f1440ecfa180227a1d94a190d9a5737876cdf Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 12 Oct 2020 11:03:10 +0200 Subject: [PATCH 6/9] Update train.py --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 563916529ace..8b3acd919e05 100644 --- a/train.py +++ b/train.py @@ -395,7 +395,7 @@ def train(hyp, opt, device, tb_writer=None): parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') - parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name}.txt if supplied') + parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name} if supplied') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') From 6f56e4aad707581e4ed7b4fa213861d72ac1ac33 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 12 Oct 2020 11:37:47 +0200 Subject: [PATCH 7/9] Update train.py --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 8b3acd919e05..366e237c9de1 100644 --- a/train.py +++ b/train.py @@ -495,7 +495,7 @@ def train(hyp, opt, device, tb_writer=None): if opt.bucket: os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists - for _ in tqdm(range(300), desc='perform evolve >>'): # generations to evolve + for _ in range(300): # generations to evolve if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' From aabe4f7720eb9f2a024d24be474938776f43f979 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 12 Oct 2020 11:38:45 +0200 Subject: [PATCH 8/9] reinstate anchors into meta dict anchor evolution is working correctly now --- train.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/train.py b/train.py index 366e237c9de1..ffb1a3eff55c 100644 --- a/train.py +++ b/train.py @@ -472,8 +472,7 @@ def train(hyp, opt, device, tb_writer=None): 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold - # temp fix for https://github.com/ultralytics/yolov5/issues/607#issuecomment-692589883 - # 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) + 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) From 8a32d254511ddd4790acb42b2f10463172dc0d69 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 12 Oct 2020 11:41:18 +0200 Subject: [PATCH 9/9] reinstate logger prefer the single line readout for concise logging, which helps simplify notebook and tutorials etc. --- train.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/train.py b/train.py index ffb1a3eff55c..357c2c6434ad 100644 --- a/train.py +++ b/train.py @@ -6,7 +6,6 @@ import shutil import time from pathlib import Path -from pprint import pprint import numpy as np import torch.distributed as dist @@ -442,7 +441,7 @@ def train(hyp, opt, device, tb_writer=None): assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' opt.batch_size = opt.total_batch_size // opt.world_size - pprint(vars(opt)) + logger.info(opt) with open(opt.hyp) as f: hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps