diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml index d62cf5c1600d..ee472297107e 100644 --- a/.github/workflows/greetings.yml +++ b/.github/workflows/greetings.yml @@ -11,7 +11,7 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} pr-message: | 👋 Hello @${{ github.actor }}, thank you for submitting a 🚀 PR! To allow your work to be integrated as seamlessly as possible, we advise you to: - - ✅ Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch: + - ✅ Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master an automatic [GitHub actions](https://github.com/ultralytics/yolov5/blob/master/.github/workflows/rebase.yml) rebase may be attempted by including the /rebase command in a comment body, or by running the following code, replacing 'feature' with the name of your local branch: ```bash git remote add upstream https://github.com/ultralytics/yolov5.git git fetch upstream diff --git a/Dockerfile b/Dockerfile index 98dfee204770..fe64d6da29f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM nvcr.io/nvidia/pytorch:20.12-py3 # Install linux packages -RUN apt update && apt install -y screen libgl1-mesa-glx +RUN apt update && apt install -y zip screen libgl1-mesa-glx # Install python dependencies RUN python -m pip install --upgrade pip diff --git a/README.md b/README.md index 3c14071698c5..b7129e80adfe 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@   -![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg) +CI CPU testing This repository represents Ultralytics open-source research into future object detection methods, and incorporates lessons learned and best practices evolved over thousands of hours of training and evolution on anonymized client datasets. **All code and models are under active development, and are subject to modification or deletion without notice.** Use at your own risk. @@ -89,17 +89,15 @@ To run inference on example images in `data/images`: ```bash $ python detect.py --source data/images --weights yolov5s.pt --conf 0.25 -Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', img_size=640, iou_thres=0.45, save_conf=False, save_dir='runs/detect', save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt']) -Using torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130MB) - -Downloading https://github.com/ultralytics/yolov5/releases/download/v3.1/yolov5s.pt to yolov5s.pt... 100%|██████████████| 14.5M/14.5M [00:00<00:00, 21.3MB/s] +Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=False, img_size=640, iou_thres=0.45, name='exp', project='runs/detect', save_conf=False, save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt']) +YOLOv5 v4.0-96-g83dc1b4 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB) Fusing layers... -Model Summary: 232 layers, 7459581 parameters, 0 gradients -image 1/2 data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.012s) -image 2/2 data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.012s) -Results saved to runs/detect/exp -Done. (0.113s) +Model Summary: 224 layers, 7266973 parameters, 0 gradients, 17.0 GFLOPS +image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, Done. (0.010s) +image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 1 tie, Done. (0.011s) +Results saved to runs/detect/exp2 +Done. (0.103s) ``` @@ -108,18 +106,17 @@ Done. (0.113s) To run **batched inference** with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36): ```python import torch -from PIL import Image # Model model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) # Images -img1 = Image.open('zidane.jpg') -img2 = Image.open('bus.jpg') -imgs = [img1, img2] # batched list of images +dir = 'https://github.com/ultralytics/yolov5/raw/master/data/images/' +imgs = [dir + f for f in ('zidane.jpg', 'bus.jpg')] # batched list of images # Inference -result = model(imgs) +results = model(imgs) +results.print() # or .show(), .save() ``` diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh index b0df905c8525..bbb1e9291d5b 100755 --- a/data/scripts/get_coco.sh +++ b/data/scripts/get_coco.sh @@ -10,8 +10,9 @@ # Download/unzip labels d='../' # unzip directory url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ -f='coco2017labels.zip' # 68 MB -echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove +f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB +echo 'Downloading' $url$f ' ...' +curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background # Download/unzip images d='../coco/images' # unzip directory @@ -20,7 +21,7 @@ f1='train2017.zip' # 19G, 118k images f2='val2017.zip' # 1G, 5k images f3='test2017.zip' # 7G, 41k images (optional) for f in $f1 $f2; do - echo 'Downloading' $url$f '...' && curl -L $url$f -o $f # download, (unzip, remove in background) - unzip -q $f -d $d && rm $f & + echo 'Downloading' $url$f '...' + curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background done wait # finish background tasks diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh index 06414b085095..13b83c28d706 100644 --- a/data/scripts/get_voc.sh +++ b/data/scripts/get_voc.sh @@ -18,8 +18,8 @@ f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images for f in $f3 $f2 $f1; do - echo 'Downloading' $url$f '...' && curl -L $url$f -o $f # download, (unzip, remove in background) - unzip -q $f -d $d && rm $f & + echo 'Downloading' $url$f '...' + curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background done wait # finish background tasks diff --git a/detect.py b/detect.py index f9085e670916..22bf21b4c825 100644 --- a/detect.py +++ b/detect.py @@ -9,8 +9,8 @@ from models.experimental import attempt_load from utils.datasets import LoadStreams, LoadImages -from utils.general import check_img_size, check_requirements, non_max_suppression, apply_classifier, scale_coords, \ - xyxy2xywh, strip_optimizer, set_logging, increment_path +from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \ + scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path from utils.plots import plot_one_box from utils.torch_utils import select_device, load_classifier, time_synchronized @@ -45,7 +45,7 @@ def detect(save_img=False): # Set Dataloader vid_path, vid_writer = None, None if webcam: - view_img = True + view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: @@ -118,6 +118,7 @@ def detect(save_img=False): # Stream results if view_img: cv2.imshow(str(p), im0) + cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: diff --git a/hubconf.py b/hubconf.py index 2a34813310e8..47eee4477725 100644 --- a/hubconf.py +++ b/hubconf.py @@ -133,9 +133,14 @@ def custom(path_or_model='path/to/model.pt', autoshape=True): # model = custom(path_or_model='path/to/model.pt') # custom example # Verify inference + import numpy as np from PIL import Image - imgs = [Image.open(x) for x in Path('data/images').glob('*.jpg')] - results = model(imgs) + imgs = [Image.open('data/images/bus.jpg'), # PIL + 'data/images/zidane.jpg', # filename + 'https://github.com/ultralytics/yolov5/raw/master/data/images/bus.jpg', # URI + np.zeros((640, 480, 3))] # numpy + + results = model(imgs) # batched inference results.print() results.save() diff --git a/models/common.py b/models/common.py index e8adb66293d5..ad35f908d865 100644 --- a/models/common.py +++ b/models/common.py @@ -1,16 +1,17 @@ # This file contains modules common to various models import math +from pathlib import Path import numpy as np import requests import torch import torch.nn as nn -from PIL import Image, ImageDraw +from PIL import Image from utils.datasets import letterbox from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh -from utils.plots import color_list +from utils.plots import color_list, plot_one_box def autopad(k, p=None): # kernel, padding @@ -195,10 +196,12 @@ def forward(self, imgs, size=640, augment=False, profile=False): # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images - shape0, shape1 = [], [] # image and inference shapes + shape0, shape1, files = [], [], [] # image and inference shapes, filenames for i, im in enumerate(imgs): if isinstance(im, str): # filename or uri - im = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im) # open + im, f = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im), im # open + im.filename = f # for uri + files.append(Path(im.filename).with_suffix('.jpg').name if isinstance(im, Image.Image) else f'image{i}.jpg') im = np.array(im) # to numpy if im.shape[0] < 5: # image in CHW im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) @@ -223,25 +226,26 @@ def forward(self, imgs, size=640, augment=False, profile=False): for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) - return Detections(imgs, y, self.names) + return Detections(imgs, y, files, self.names) class Detections: # detections class for YOLOv5 inference results - def __init__(self, imgs, pred, names=None): + def __init__(self, imgs, pred, files, names=None): super(Detections, self).__init__() d = pred[0].device # device gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations self.imgs = imgs # list of images as numpy arrays self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) self.names = names # class names + self.files = files # image filenames self.xyxy = pred # xyxy pixels self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized self.n = len(self.pred) - def display(self, pprint=False, show=False, save=False, render=False): + def display(self, pprint=False, show=False, save=False, render=False, save_dir=''): colors = color_list() for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' @@ -250,16 +254,16 @@ def display(self, pprint=False, show=False, save=False, render=False): n = (pred[:, -1] == c).sum() # detections per class str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string if show or save or render: - img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np for *box, conf, cls in pred: # xyxy, confidence, class - # str += '%s %.2f, ' % (names[int(cls)], conf) # label - ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot + label = f'{self.names[int(cls)]} {conf:.2f}' + plot_one_box(box, img, label=label, color=colors[int(cls) % 10]) + img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np if pprint: print(str.rstrip(', ')) if show: - img.show(f'image {i}') # show + img.show(self.files[i]) # show if save: - f = f'results{i}.jpg' + f = Path(save_dir) / self.files[i] img.save(f) # save print(f"{'Saving' * (i == 0)} {f},", end='' if i < self.n - 1 else ' done.\n') if render: @@ -271,8 +275,9 @@ def print(self): def show(self): self.display(show=True) # show results - def save(self): - self.display(save=True) # save results + def save(self, save_dir='results/'): + Path(save_dir).mkdir(exist_ok=True) + self.display(save=True, save_dir=save_dir) # save results def render(self): self.display(render=True) # render results diff --git a/models/export.py b/models/export.py index 057658af53dc..cc817871f218 100644 --- a/models/export.py +++ b/models/export.py @@ -22,6 +22,7 @@ parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width + parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') parser.add_argument('--batch-size', type=int, default=1, help='batch size') opt = parser.parse_args() opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand @@ -70,7 +71,9 @@ print('\nStarting ONNX export with onnx %s...' % onnx.__version__) f = opt.weights.replace('.pt', '.onnx') # filename torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], - output_names=['classes', 'boxes'] if y is None else ['output']) + output_names=['classes', 'boxes'] if y is None else ['output'], + dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640) + 'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None) # Checks onnx_model = onnx.load(f) # load onnx model diff --git a/models/yolo.py b/models/yolo.py index 11e6a65921a4..85043f2b0205 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -2,7 +2,6 @@ import logging import sys from copy import deepcopy -from pathlib import Path sys.path.append('./') # to run '$ python *.py' files in subdirectories logger = logging.getLogger(__name__) @@ -50,7 +49,7 @@ def forward(self, x): self.grid[i] = self._make_grid(nx, ny).to(x[i].device) y = x[i].sigmoid() - y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh z.append(y.view(bs, -1, self.no)) @@ -110,9 +109,9 @@ def forward(self, x, augment=False, profile=False): # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save yi[..., :4] /= si # de-scale if fi == 2: - yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud + yi[..., 1] = img_size[0] - 1 - yi[..., 1] # de-flip ud elif fi == 3: - yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr + yi[..., 0] = img_size[1] - 1 - yi[..., 0] # de-flip lr y.append(yi) return torch.cat(y, 1), None # augmented inference, train else: @@ -213,43 +212,27 @@ def parse_model(d, ch): # model_dict, input_channels(3) if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: c1, c2 = ch[f], args[0] - - # Normal - # if i > 0 and args[0] != no: # channel expansion factor - # ex = 1.75 # exponential (default 2.0) - # e = math.log(c2 / ch[1]) / math.log(2) - # c2 = int(ch[1] * ex ** e) - # if m != Focus: - - c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 - - # Experimental - # if i > 0 and args[0] != no: # channel expansion factor - # ex = 1 + gw # exponential (default 2.0) - # ch1 = 32 # ch[1] - # e = math.log(c2 / ch1) / math.log(2) # level 1-n - # c2 = int(ch1 * ex ** e) - # if m != Focus: - # c2 = make_divisible(c2, 8) if c2 != no else c2 + if c2 != no: # if not output + c2 = make_divisible(c2 * gw, 8) args = [c1, c2, *args[1:]] if m in [BottleneckCSP, C3]: - args.insert(2, n) + args.insert(2, n) # number of repeats n = 1 elif m is nn.BatchNorm2d: args = [ch[f]] elif m is Concat: - c2 = sum([ch[x if x < 0 else x + 1] for x in f]) + c2 = sum([ch[x] for x in f]) elif m is Detect: - args.append([ch[x + 1] for x in f]) + args.append([ch[x] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) elif m is Contract: - c2 = ch[f if f < 0 else f + 1] * args[0] ** 2 + c2 = ch[f] * args[0] ** 2 elif m is Expand: - c2 = ch[f if f < 0 else f + 1] // args[0] ** 2 + c2 = ch[f] // args[0] ** 2 else: - c2 = ch[f if f < 0 else f + 1] + c2 = ch[f] m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type @@ -258,6 +241,8 @@ def parse_model(d, ch): # model_dict, input_channels(3) logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) + if i == 0: + ch = [] ch.append(c2) return nn.Sequential(*layers), sorted(save) diff --git a/requirements.txt b/requirements.txt index d22b42f5d786..cb50cf8f32e1 100755 --- a/requirements.txt +++ b/requirements.txt @@ -21,8 +21,8 @@ seaborn>=0.11.0 pandas # export -------------------------------------- -# coremltools==4.0 -# onnx>=1.8.0 +# coremltools>=4.1 +# onnx>=1.8.1 # scikit-learn==0.19.2 # for coreml quantization # extras -------------------------------------- diff --git a/test.py b/test.py index 738764f15601..91176eca01db 100644 --- a/test.py +++ b/test.py @@ -52,7 +52,8 @@ def test(data, # Load model model = attempt_load(weights, map_location=device) # load FP32 model - imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size + gs = max(int(model.stride.max()), 32) # grid size (max stride) + imgsz = check_img_size(imgsz, s=gs) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: @@ -85,7 +86,7 @@ def test(data, if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images - dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True, + dataloader = create_dataloader(path, imgsz, batch_size, gs, opt, pad=0.5, rect=True, prefix=colorstr('test: ' if opt.task == 'test' else 'val: '))[0] seen = 0 @@ -106,7 +107,7 @@ def test(data, with torch.no_grad(): # Run model t = time_synchronized() - inf_out, train_out = model(img, augment=augment) # inference and training outputs + out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss @@ -117,11 +118,11 @@ def test(data, targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) + out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True) t1 += time_synchronized() - t # Statistics per image - for si, pred in enumerate(output): + for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class @@ -209,7 +210,7 @@ def test(data, f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions - Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start() + Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy @@ -268,10 +269,10 @@ def test(data, print(f'pycocotools unable to run: {e}') # Return results + model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") - model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] diff --git a/train.py b/train.py index 4ec97ae71e16..bbf879f3af5f 100644 --- a/train.py +++ b/train.py @@ -4,6 +4,7 @@ import os import random import time +from copy import deepcopy from pathlib import Path from threading import Thread @@ -31,7 +32,7 @@ from utils.google_utils import attempt_download from utils.loss import ComputeLoss from utils.plots import plot_images, plot_labels, plot_results, plot_evolution -from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first +from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, is_parallel logger = logging.getLogger(__name__) @@ -120,7 +121,10 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR - lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] + if opt.linear_lr: + lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear + else: + lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) @@ -130,9 +134,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): wandb_run = wandb.init(config=opt, resume="allow", project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, name=save_dir.stem, + entity=opt.entity, id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) loggers = {'wandb': wandb} # loggers dict + # EMA + ema = ModelEMA(model) if rank in [-1, 0] else None + # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: @@ -141,10 +149,14 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] + # EMA + if ema and ckpt.get('ema'): + ema.ema.load_state_dict(ckpt['ema'][0].float().state_dict()) + ema.updates = ckpt['ema'][1] + # Results if ckpt.get('training_results') is not None: - with open(results_file, 'w') as file: - file.write(ckpt['training_results']) # write results.txt + results_file.write_text(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 @@ -158,7 +170,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): del ckpt, state_dict # Image sizes - gs = int(model.stride.max()) # grid size (max stride) + gs = max(int(model.stride.max()), 32) # grid size (max stride) nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples @@ -171,9 +183,6 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') - # EMA - ema = ModelEMA(model) if rank in [-1, 0] else None - # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) @@ -189,8 +198,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): # Process 0 if rank in [-1, 0]: - ema.updates = start_epoch * nb // accumulate # set EMA updates - testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, # testloader + testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5, prefix=colorstr('val: '))[0] @@ -333,12 +341,11 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP - if ema: - ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights']) + ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test(opt.data, - batch_size=total_batch_size, + batch_size=batch_size * 2, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, @@ -351,7 +358,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): # Write with open(results_file, 'a') as f: - f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) @@ -372,30 +379,30 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): best_fitness = fi # Save model - save = (not opt.nosave) or (final_epoch and not opt.evolve) - if save: - with open(results_file, 'r') as f: # create checkpoint - ckpt = {'epoch': epoch, - 'best_fitness': best_fitness, - 'training_results': f.read(), - 'model': ema.ema, - 'optimizer': None if final_epoch else optimizer.state_dict(), - 'wandb_id': wandb_run.id if wandb else None} + if (not opt.nosave) or (final_epoch and not opt.evolve): # if save + ckpt = {'epoch': epoch, + 'best_fitness': best_fitness, + 'training_results': results_file.read_text(), + 'model': deepcopy(model.module if is_parallel(model) else model).half(), + 'ema': (deepcopy(ema.ema).half(), ema.updates), + 'optimizer': optimizer.state_dict(), + 'wandb_id': wandb_run.id if wandb else None} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt + # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers final = best if best.exists() else last # final model - for f in [last, best]: + for f in last, best: if f.exists(): - strip_optimizer(f) # strip optimizers + strip_optimizer(f) if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload @@ -412,17 +419,17 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): # Test best.pt logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) if opt.data.endswith('coco.yaml') and nc == 80: # if COCO - for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]): # speed, mAP tests + for m in (last, best) if best.exists() else (last): # speed, mAP tests results, _, _ = test.test(opt.data, - batch_size=total_batch_size, + batch_size=batch_size * 2, imgsz=imgsz_test, - conf_thres=conf, - iou_thres=iou, - model=attempt_load(final, device).half(), + conf_thres=0.001, + iou_thres=0.7, + model=attempt_load(m, device).half(), single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, - save_json=save_json, + save_json=True, plots=False) else: @@ -461,9 +468,11 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): parser.add_argument('--log-artifacts', action='store_true', help='log artifacts, i.e. final trained model') parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') parser.add_argument('--project', default='runs/train', help='save to project/name') + parser.add_argument('--entity', default=None, help='W&B entity') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') + parser.add_argument('--linear-lr', action='store_true', help='linear LR') opt = parser.parse_args() # Set DDP variables diff --git a/tutorial.ipynb b/tutorial.ipynb index 3f7133f4f7d7..7fce40c3824e 100644 --- a/tutorial.ipynb +++ b/tutorial.ipynb @@ -16,7 +16,7 @@ "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { - "811fd52fef65422c8267bafcde8a2c3d": { + "1f8e9b8ebded4175b2eaa9f75c3ceb00": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { @@ -28,15 +28,15 @@ "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", - "layout": "IPY_MODEL_8f41b90117224eef9133a9c3a103dbba", + "layout": "IPY_MODEL_0a1246a73077468ab80e979cc0576cd2", "_model_module": "@jupyter-widgets/controls", "children": [ - "IPY_MODEL_ca2fb37af6ed43d4a74cdc9f2ac5c4a5", - "IPY_MODEL_29419ae5ebb9403ea73f7e5a68037bdd" + "IPY_MODEL_d327cde5a85a4a51bb8b1b3e9cf06c97", + "IPY_MODEL_d5ef1cb2cbed4b87b3c5d292ff2b0da6" ] } }, - "8f41b90117224eef9133a9c3a103dbba": { + "0a1246a73077468ab80e979cc0576cd2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { @@ -87,12 +87,12 @@ "left": null } }, - "ca2fb37af6ed43d4a74cdc9f2ac5c4a5": { + "d327cde5a85a4a51bb8b1b3e9cf06c97": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", - "style": "IPY_MODEL_6511b4dfb10b48d1bc98bcfb3987bfa0", + "style": "IPY_MODEL_8d5dff8bca14435a88fa1814533acd85", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", @@ -107,30 +107,30 @@ "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_64f0badf1a8f489885aa984dd62d37dc" + "layout": "IPY_MODEL_3d5136c19e7645ca9bc8f51ceffb2be1" } }, - "29419ae5ebb9403ea73f7e5a68037bdd": { + "d5ef1cb2cbed4b87b3c5d292ff2b0da6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", - "style": "IPY_MODEL_f569911c5cfc4d81bb1bdfa83447afc8", + "style": "IPY_MODEL_2919396dbd4b4c8e821d12bd28665d8a", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "value": " 781M/781M [00:23<00:00, 34.2MB/s]", + "value": " 781M/781M [00:12<00:00, 65.5MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_84943ade566440aaa2dcf3b3b27e7074" + "layout": "IPY_MODEL_6feb16f2b2fa4021b1a271e1dd442d04" } }, - "6511b4dfb10b48d1bc98bcfb3987bfa0": { + "8d5dff8bca14435a88fa1814533acd85": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { @@ -145,7 +145,7 @@ "_model_module": "@jupyter-widgets/controls" } }, - "64f0badf1a8f489885aa984dd62d37dc": { + "3d5136c19e7645ca9bc8f51ceffb2be1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { @@ -196,7 +196,7 @@ "left": null } }, - "f569911c5cfc4d81bb1bdfa83447afc8": { + "2919396dbd4b4c8e821d12bd28665d8a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { @@ -210,7 +210,7 @@ "_model_module": "@jupyter-widgets/controls" } }, - "84943ade566440aaa2dcf3b3b27e7074": { + "6feb16f2b2fa4021b1a271e1dd442d04": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { @@ -261,7 +261,7 @@ "left": null } }, - "8501ed1563e4452eac9df6b7a66e8f8c": { + "e6459e0bcee449b090fc9807672725bc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { @@ -273,15 +273,15 @@ "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", - "layout": "IPY_MODEL_d2bb96801e1f46f4a58e02534f7026ff", + "layout": "IPY_MODEL_c341e1d3bf3b40d1821ce392eb966c68", "_model_module": "@jupyter-widgets/controls", "children": [ - "IPY_MODEL_468a796ef06b4a24bcba6fbd4a0a8db5", - "IPY_MODEL_42ad5c1ea7be4835bffebf90642178f1" + "IPY_MODEL_660afee173694231a6dce3cd94df6cae", + "IPY_MODEL_261218485cef48df961519dde5edfcbe" ] } }, - "d2bb96801e1f46f4a58e02534f7026ff": { + "c341e1d3bf3b40d1821ce392eb966c68": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { @@ -332,12 +332,12 @@ "left": null } }, - "468a796ef06b4a24bcba6fbd4a0a8db5": { + "660afee173694231a6dce3cd94df6cae": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", - "style": "IPY_MODEL_c58b5536d98f4814831934e9c30c4d78", + "style": "IPY_MODEL_32736d503c06497abfae8c0421918255", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", @@ -352,30 +352,30 @@ "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_505597101151486ea29e9ab754544d27" + "layout": "IPY_MODEL_e257738711f54d5280c8393d9d3dce1c" } }, - "42ad5c1ea7be4835bffebf90642178f1": { + "261218485cef48df961519dde5edfcbe": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", - "style": "IPY_MODEL_de6e7b4b4a1c408c9f89d89b07a13bcd", + "style": "IPY_MODEL_beb7a6fe34b840899bb79c062681696f", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "value": " 21.1M/21.1M [00:01<00:00, 18.2MB/s]", + "value": " 21.1M/21.1M [00:00<00:00, 33.5MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_f5cc9c7d4c274b2d81327ba3163c43fd" + "layout": "IPY_MODEL_e639132395d64d70b99d8b72c32f8fbb" } }, - "c58b5536d98f4814831934e9c30c4d78": { + "32736d503c06497abfae8c0421918255": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { @@ -390,7 +390,7 @@ "_model_module": "@jupyter-widgets/controls" } }, - "505597101151486ea29e9ab754544d27": { + "e257738711f54d5280c8393d9d3dce1c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { @@ -441,7 +441,7 @@ "left": null } }, - "de6e7b4b4a1c408c9f89d89b07a13bcd": { + "beb7a6fe34b840899bb79c062681696f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { @@ -455,7 +455,7 @@ "_model_module": "@jupyter-widgets/controls" } }, - "f5cc9c7d4c274b2d81327ba3163c43fd": { + "e639132395d64d70b99d8b72c32f8fbb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { @@ -550,7 +550,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "c6ad57c2-40b7-4764-b07d-19ee2ceaabaf" + "outputId": "ae8805a9-ce15-4e1c-f6b4-baa1c1033f56" }, "source": [ "!git clone https://github.com/ultralytics/yolov5 # clone repo\n", @@ -568,7 +568,7 @@ { "output_type": "stream", "text": [ - "Setup complete. Using torch 1.7.0+cu101 _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16130MB, multi_processor_count=80)\n" + "Setup complete. Using torch 1.7.0+cu101 _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16160MB, multi_processor_count=80)\n" ], "name": "stdout" } @@ -672,17 +672,17 @@ "base_uri": "https://localhost:8080/", "height": 65, "referenced_widgets": [ - "811fd52fef65422c8267bafcde8a2c3d", - "8f41b90117224eef9133a9c3a103dbba", - "ca2fb37af6ed43d4a74cdc9f2ac5c4a5", - "29419ae5ebb9403ea73f7e5a68037bdd", - "6511b4dfb10b48d1bc98bcfb3987bfa0", - "64f0badf1a8f489885aa984dd62d37dc", - "f569911c5cfc4d81bb1bdfa83447afc8", - "84943ade566440aaa2dcf3b3b27e7074" + "1f8e9b8ebded4175b2eaa9f75c3ceb00", + "0a1246a73077468ab80e979cc0576cd2", + "d327cde5a85a4a51bb8b1b3e9cf06c97", + "d5ef1cb2cbed4b87b3c5d292ff2b0da6", + "8d5dff8bca14435a88fa1814533acd85", + "3d5136c19e7645ca9bc8f51ceffb2be1", + "2919396dbd4b4c8e821d12bd28665d8a", + "6feb16f2b2fa4021b1a271e1dd442d04" ] }, - "outputId": "59a7a546-8492-492e-861d-70a2c85a6794" + "outputId": "d6ace7c6-1be5-41ff-d607-1c716b88d298" }, "source": [ "# Download COCO val2017\n", @@ -695,7 +695,7 @@ "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "811fd52fef65422c8267bafcde8a2c3d", + "model_id": "1f8e9b8ebded4175b2eaa9f75c3ceb00", "version_minor": 0, "version_major": 2 }, @@ -723,7 +723,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "427c211e-e283-4e87-f7b3-7b8dfb11a4a5" + "outputId": "cc25f70c-0a11-44f6-cc44-e92c5083488c" }, "source": [ "# Run YOLOv5x on COCO val2017\n", @@ -735,34 +735,33 @@ "output_type": "stream", "text": [ "Namespace(augment=False, batch_size=32, conf_thres=0.001, data='./data/coco.yaml', device='', exist_ok=False, img_size=640, iou_thres=0.65, name='exp', project='runs/test', save_conf=False, save_hybrid=False, save_json=True, save_txt=False, single_cls=False, task='val', verbose=False, weights=['yolov5x.pt'])\n", - "YOLOv5 v4.0-21-gb26a2f6 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130.5MB)\n", + "YOLOv5 v4.0-75-gbdd88e1 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)\n", "\n", "Downloading https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5x.pt to yolov5x.pt...\n", - "100% 168M/168M [00:05<00:00, 31.9MB/s]\n", + "100% 168M/168M [00:04<00:00, 39.7MB/s]\n", "\n", "Fusing layers... \n", "Model Summary: 476 layers, 87730285 parameters, 0 gradients, 218.8 GFLOPS\n", - "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/labels/val2017' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2791.81it/s]\n", - "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: ../coco/labels/val2017.cache\n", - "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/labels/val2017.cache' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:00<00:00, 13332180.55it/s]\n", - " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:30<00:00, 1.73it/s]\n", - " all 5e+03 3.63e+04 0.419 0.765 0.68 0.486\n", - "Speed: 5.2/2.0/7.2 ms inference/NMS/total per 640x640 image at batch-size 32\n", + "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/val2017' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2824.78it/s]\n", + "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: ../coco/val2017.cache\n", + " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:33<00:00, 1.68it/s]\n", + " all 5e+03 3.63e+04 0.749 0.619 0.68 0.486\n", + "Speed: 5.2/2.0/7.3 ms inference/NMS/total per 640x640 image at batch-size 32\n", "\n", "Evaluating pycocotools mAP... saving runs/test/exp/yolov5x_predictions.json...\n", "loading annotations into memory...\n", - "Done (t=0.41s)\n", + "Done (t=0.44s)\n", "creating index...\n", "index created!\n", "Loading and preparing results...\n", - "DONE (t=5.26s)\n", + "DONE (t=4.47s)\n", "creating index...\n", "index created!\n", "Running per image evaluation...\n", "Evaluate annotation type *bbox*\n", - "DONE (t=93.97s).\n", + "DONE (t=94.87s).\n", "Accumulating evaluation results...\n", - "DONE (t=15.06s).\n", + "DONE (t=15.96s).\n", " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.501\n", " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.687\n", " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.544\n", @@ -837,17 +836,17 @@ "base_uri": "https://localhost:8080/", "height": 65, "referenced_widgets": [ - "8501ed1563e4452eac9df6b7a66e8f8c", - "d2bb96801e1f46f4a58e02534f7026ff", - "468a796ef06b4a24bcba6fbd4a0a8db5", - "42ad5c1ea7be4835bffebf90642178f1", - "c58b5536d98f4814831934e9c30c4d78", - "505597101151486ea29e9ab754544d27", - "de6e7b4b4a1c408c9f89d89b07a13bcd", - "f5cc9c7d4c274b2d81327ba3163c43fd" + "e6459e0bcee449b090fc9807672725bc", + "c341e1d3bf3b40d1821ce392eb966c68", + "660afee173694231a6dce3cd94df6cae", + "261218485cef48df961519dde5edfcbe", + "32736d503c06497abfae8c0421918255", + "e257738711f54d5280c8393d9d3dce1c", + "beb7a6fe34b840899bb79c062681696f", + "e639132395d64d70b99d8b72c32f8fbb" ] }, - "outputId": "c68a3db4-1314-46b4-9e52-83532eb65749" + "outputId": "e8b7d5b3-a71e-4446-eec2-ad13419cf700" }, "source": [ "# Download COCO128\n", @@ -860,7 +859,7 @@ "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8501ed1563e4452eac9df6b7a66e8f8c", + "model_id": "e6459e0bcee449b090fc9807672725bc", "version_minor": 0, "version_major": 2 }, @@ -925,7 +924,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "6af7116a-01ab-4b94-e5d7-b37c17dc95de" + "outputId": "38e51b29-2df4-4f00-cde8-5f6e4a34da9e" }, "source": [ "# Train YOLOv5s on COCO128 for 3 epochs\n", @@ -937,15 +936,15 @@ "output_type": "stream", "text": [ "\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n", - "YOLOv5 v4.0-21-gb26a2f6 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130.5MB)\n", + "YOLOv5 v4.0-75-gbdd88e1 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)\n", "\n", - "Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='', data='./data/coco128.yaml', device='', epochs=3, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[640, 640], local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='exp', noautoanchor=False, nosave=True, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/exp', single_cls=False, sync_bn=False, total_batch_size=16, weights='yolov5s.pt', workers=8, world_size=1)\n", + "Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='', data='./data/coco128.yaml', device='', epochs=3, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[640, 640], linear_lr=False, local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='exp', noautoanchor=False, nosave=True, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/exp', single_cls=False, sync_bn=False, total_batch_size=16, weights='yolov5s.pt', workers=8, world_size=1)\n", "\u001b[34m\u001b[1mwandb: \u001b[0mInstall Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)\n", "Start Tensorboard with \"tensorboard --logdir runs/train\", view at http://localhost:6006/\n", - "2021-01-17 19:56:03.945851: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n", + "2021-02-12 06:38:28.027271: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n", "\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0\n", "Downloading https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5s.pt to yolov5s.pt...\n", - "100% 14.1M/14.1M [00:00<00:00, 15.8MB/s]\n", + "100% 14.1M/14.1M [00:01<00:00, 13.2MB/s]\n", "\n", "\n", " from n params module arguments \n", @@ -979,12 +978,11 @@ "Transferred 362/362 items from yolov5s.pt\n", "Scaled weight_decay = 0.0005\n", "Optimizer groups: 62 .bias, 62 conv.weight, 59 other\n", - "\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 2647.74it/s]\n", + "\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 2566.00it/s]\n", "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: ../coco128/labels/train2017.cache\n", - "\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 1503840.09it/s]\n", - "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 176.03it/s]\n", - "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 24200.82it/s]\n", - "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:01<00:00, 123.25it/s]\n", + "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 175.07it/s]\n", + "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 764773.38it/s]\n", + "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 128.17it/s]\n", "Plotting labels... \n", "\n", "\u001b[34m\u001b[1mautoanchor: \u001b[0mAnalyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946\n", @@ -994,19 +992,19 @@ "Starting training for 3 epochs...\n", "\n", " Epoch gpu_mem box obj cls total targets img_size\n", - " 0/2 3.27G 0.04357 0.06779 0.01869 0.1301 207 640: 100% 8/8 [00:04<00:00, 1.95it/s]\n", - " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:05<00:00, 1.36it/s]\n", - " all 128 929 0.392 0.732 0.657 0.428\n", + " 0/2 3.27G 0.04357 0.06781 0.01869 0.1301 207 640: 100% 8/8 [00:03<00:00, 2.03it/s]\n", + " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:04<00:00, 1.14s/it]\n", + " all 128 929 0.646 0.627 0.659 0.431\n", "\n", " Epoch gpu_mem box obj cls total targets img_size\n", - " 1/2 7.47G 0.04308 0.06636 0.02083 0.1303 227 640: 100% 8/8 [00:02<00:00, 3.88it/s]\n", - " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:01<00:00, 5.07it/s]\n", - " all 128 929 0.387 0.737 0.657 0.432\n", + " 1/2 7.75G 0.04308 0.06654 0.02083 0.1304 227 640: 100% 8/8 [00:01<00:00, 4.11it/s]\n", + " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:01<00:00, 2.94it/s]\n", + " all 128 929 0.681 0.607 0.663 0.434\n", "\n", " Epoch gpu_mem box obj cls total targets img_size\n", - " 2/2 7.48G 0.04461 0.06864 0.01866 0.1319 191 640: 100% 8/8 [00:02<00:00, 3.57it/s]\n", - " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:02<00:00, 2.82it/s]\n", - " all 128 929 0.385 0.742 0.658 0.431\n", + " 2/2 7.75G 0.04461 0.06896 0.01866 0.1322 191 640: 100% 8/8 [00:02<00:00, 3.94it/s]\n", + " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:03<00:00, 1.22it/s]\n", + " all 128 929 0.642 0.632 0.662 0.432\n", "Optimizer stripped from runs/train/exp/weights/last.pt, 14.8MB\n", "3 epochs completed in 0.007 hours.\n", "\n" @@ -1224,6 +1222,19 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "code", + "metadata": { + "id": "RVRSOhEvUdb5" + }, + "source": [ + "# Evolve\n", + "!python train.py --img 640 --batch 64 --epochs 100 --data coco128.yaml --weights yolov5s.pt --cache --noautoanchor --evolve\n", + "!d=runs/train/evolve && cp evolve.* $d && zip -r evolve.zip $d && gsutil mv evolve.zip gs://bucket # upload results (optional)" + ], + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "metadata": { @@ -1238,4 +1249,4 @@ "outputs": [] } ] -} +} \ No newline at end of file diff --git a/utils/aws/__init__.py b/utils/aws/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/utils/aws/mime.sh b/utils/aws/mime.sh new file mode 100644 index 000000000000..c319a83cfbdf --- /dev/null +++ b/utils/aws/mime.sh @@ -0,0 +1,26 @@ +# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/ +# This script will run on every instance restart, not only on first start +# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA --- + +Content-Type: multipart/mixed; boundary="//" +MIME-Version: 1.0 + +--// +Content-Type: text/cloud-config; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Content-Disposition: attachment; filename="cloud-config.txt" + +#cloud-config +cloud_final_modules: +- [scripts-user, always] + +--// +Content-Type: text/x-shellscript; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Content-Disposition: attachment; filename="userdata.txt" + +#!/bin/bash +# --- paste contents of userdata.sh here --- +--// diff --git a/utils/aws/resume.py b/utils/aws/resume.py new file mode 100644 index 000000000000..563f22be20dc --- /dev/null +++ b/utils/aws/resume.py @@ -0,0 +1,37 @@ +# Resume all interrupted trainings in yolov5/ dir including DPP trainings +# Usage: $ python utils/aws/resume.py + +import os +import sys +from pathlib import Path + +import torch +import yaml + +sys.path.append('./') # to run '$ python *.py' files in subdirectories + +port = 0 # --master_port +path = Path('').resolve() +for last in path.rglob('*/**/last.pt'): + ckpt = torch.load(last) + if ckpt['optimizer'] is None: + continue + + # Load opt.yaml + with open(last.parent.parent / 'opt.yaml') as f: + opt = yaml.load(f, Loader=yaml.SafeLoader) + + # Get device count + d = opt['device'].split(',') # devices + nd = len(d) # number of devices + ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel + + if ddp: # multi-GPU + port += 1 + cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}' + else: # single-GPU + cmd = f'python train.py --resume {last}' + + cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread + print(cmd) + os.system(cmd) diff --git a/utils/aws/userdata.sh b/utils/aws/userdata.sh new file mode 100644 index 000000000000..36405d1a1565 --- /dev/null +++ b/utils/aws/userdata.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html +# This script will run only once on first instance start (for a re-start script see mime.sh) +# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir +# Use >300 GB SSD + +cd home/ubuntu +if [ ! -d yolov5 ]; then + echo "Running first-time script." # install dependencies, download COCO, pull Docker + git clone https://github.com/ultralytics/yolov5 && sudo chmod -R 777 yolov5 + cd yolov5 + bash data/scripts/get_coco.sh && echo "Data done." & + sudo docker pull ultralytics/yolov5:latest && echo "Docker done." & + # python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." & +else + echo "Running re-start script." # resume interrupted runs + i=0 + list=$(docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour' + while IFS= read -r id; do + ((i++)) + echo "restarting container $i: $id" + docker start $id + # docker exec -it $id python train.py --resume # single-GPU + docker exec -d $id python utils/aws/resume.py + done <<<"$list" +fi diff --git a/utils/datasets.py b/utils/datasets.py index 1e23934b63cc..d6ab16518034 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -20,12 +20,13 @@ from torch.utils.data import Dataset from tqdm import tqdm -from utils.general import xyxy2xywh, xywh2xyxy, xywhn2xyxy, clean_str +from utils.general import xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, resample_segments, \ + clean_str from utils.torch_utils import torch_distributed_zero_first # Parameters help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' -img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes +img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp'] # acceptable image suffixes vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes logger = logging.getLogger(__name__) @@ -120,8 +121,7 @@ def __iter__(self): class LoadImages: # for inference def __init__(self, path, img_size=640, stride=32): - p = str(Path(path)) # os-agnostic - p = os.path.abspath(p) # absolute path + p = str(Path(path).absolute()) # os-agnostic absolute path if '*' in p: files = sorted(glob.glob(p, recursive=True)) # glob elif os.path.isdir(p): @@ -300,7 +300,8 @@ def update(self, index, cap): # _, self.imgs[index] = cap.read() cap.grab() if n == 4: # read every 4th frame - _, self.imgs[index] = cap.retrieve() + success, im = cap.retrieve() + self.imgs[index] = im if success else self.imgs[index] * 0 n = 0 time.sleep(0.01) # wait time @@ -334,7 +335,7 @@ def __len__(self): def img2label_paths(img_paths): # Define label paths as a function of image paths sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings - return [x.replace(sa, sb, 1).replace('.' + x.split('.')[-1], '.txt') for x in img_paths] + return ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths] class LoadImagesAndLabels(Dataset): # for training/testing @@ -349,44 +350,49 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride self.path = path - + try: f = [] # image files for p in path if isinstance(path, list) else [path]: p = Path(p) # os-agnostic if p.is_dir(): # dir f += glob.glob(str(p / '**' / '*.*'), recursive=True) + # f = list(p.rglob('**/*.*')) # pathlib elif p.is_file(): # file with open(p, 'r') as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) else: raise Exception(f'{prefix}{p} does not exist') self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) + # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib assert self.img_files, f'{prefix}No images found' except Exception as e: raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}') # Check cache self.label_files = img2label_paths(self.img_files) # labels - cache_path = Path(self.label_files[0]).parent.with_suffix('.cache') # cached labels + cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels if cache_path.is_file(): - cache = torch.load(cache_path) # load - if cache['hash'] != get_hash(self.label_files + self.img_files) or 'results' not in cache: # changed - cache = self.cache_labels(cache_path, prefix) # re-cache + cache, exists = torch.load(cache_path), True # load + if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed + cache, exists = self.cache_labels(cache_path, prefix), False # re-cache else: - cache = self.cache_labels(cache_path, prefix) # cache + cache, exists = self.cache_labels(cache_path, prefix), False # cache # Display cache - [nf, nm, ne, nc, n] = cache.pop('results') # found, missing, empty, corrupted, total - desc = f"Scanning '{cache_path}' for images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted" - tqdm(None, desc=prefix + desc, total=n, initial=n) + nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total + if exists: + d = f"Scanning '{cache_path}' for images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted" + tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}' # Read cache cache.pop('hash') # remove hash - labels, shapes = zip(*cache.values()) + cache.pop('version') # remove version + labels, shapes, self.segments = zip(*cache.values()) self.labels = list(labels) self.shapes = np.array(shapes, dtype=np.float64) self.img_files = list(cache.keys()) # update @@ -449,6 +455,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): im = Image.open(im_file) im.verify() # PIL verify shape = exif_size(im) # image size + segments = [] # instance segments assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' assert im.format.lower() in img_formats, f'invalid image format {im.format}' @@ -456,7 +463,12 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): if os.path.isfile(lb_file): nf += 1 # label found with open(lb_file, 'r') as f: - l = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels + l = [x.split() for x in f.read().strip().splitlines()] + if any([len(x) > 8 for x in l]): # is segment + classes = np.array([x[0] for x in l], dtype=np.float32) + segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...) + l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) + l = np.array(l, dtype=np.float32) if len(l): assert l.shape[1] == 5, 'labels require 5 columns each' assert (l >= 0).all(), 'negative labels' @@ -468,7 +480,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): else: nm += 1 # label missing l = np.zeros((0, 5), dtype=np.float32) - x[im_file] = [l, shape] + x[im_file] = [l, shape, segments] except Exception as e: nc += 1 print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}') @@ -480,7 +492,8 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): print(f'{prefix}WARNING: No labels found in {path}. See {help_url}') x['hash'] = get_hash(self.label_files + self.img_files) - x['results'] = [nf, nm, ne, nc, i + 1] + x['results'] = nf, nm, ne, nc, i + 1 + x['version'] = 0.1 # cache version torch.save(x, path) # save for next time logging.info(f'{prefix}New cache created: {path}') return x @@ -650,7 +663,7 @@ def hist_equalize(img, clahe=True, bgr=False): def load_mosaic(self, index): # loads images in a 4-mosaic - labels4 = [] + labels4, segments4 = [], [] s = self.img_size yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(3)] # 3 additional image indices @@ -678,19 +691,21 @@ def load_mosaic(self, index): padh = y1a - y1b # Labels - labels = self.labels[index].copy() + labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padw, padh) for x in segments] labels4.append(labels) + segments4.extend(segments) # Concat/clip labels - if len(labels4): - labels4 = np.concatenate(labels4, 0) - np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective - # img4, labels4 = replicate(img4, labels4) # replicate + labels4 = np.concatenate(labels4, 0) + for x in (labels4[:, 1:], *segments4): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img4, labels4 = replicate(img4, labels4) # replicate # Augment - img4, labels4 = random_perspective(img4, labels4, + img4, labels4 = random_perspective(img4, labels4, segments4, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], @@ -704,7 +719,7 @@ def load_mosaic(self, index): def load_mosaic9(self, index): # loads images in a 9-mosaic - labels9 = [] + labels9, segments9 = [], [] s = self.img_size indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(8)] # 8 additional image indices for i, index in enumerate(indices): @@ -737,30 +752,34 @@ def load_mosaic9(self, index): x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords # Labels - labels = self.labels[index].copy() + labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padx, pady) for x in segments] labels9.append(labels) + segments9.extend(segments) # Image img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] hp, wp = h, w # height, width previous # Offset - yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y + yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] # Concat/clip labels - if len(labels9): - labels9 = np.concatenate(labels9, 0) - labels9[:, [1, 3]] -= xc - labels9[:, [2, 4]] -= yc + labels9 = np.concatenate(labels9, 0) + labels9[:, [1, 3]] -= xc + labels9[:, [2, 4]] -= yc + c = np.array([xc, yc]) # centers + segments9 = [x - c for x in segments9] - np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective - # img9, labels9 = replicate(img9, labels9) # replicate + for x in (labels9[:, 1:], *segments9): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img9, labels9 = replicate(img9, labels9) # replicate # Augment - img9, labels9 = random_perspective(img9, labels9, + img9, labels9 = random_perspective(img9, labels9, segments9, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], @@ -821,7 +840,8 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale return img, ratio, (dw, dh) -def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): +def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, + border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] @@ -873,37 +893,38 @@ def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shea # Transform label coordinates n = len(targets) if n: - # warp points - xy = np.ones((n * 4, 3)) - xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = xy @ M.T # transform - if perspective: - xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale - else: # affine - xy = xy[:, :2].reshape(n, 8) - - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - - # # apply angle-based reduction of bounding boxes - # radians = a * math.pi / 180 - # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 - # x = (xy[:, 2] + xy[:, 0]) / 2 - # y = (xy[:, 3] + xy[:, 1]) / 2 - # w = (xy[:, 2] - xy[:, 0]) * reduction - # h = (xy[:, 3] - xy[:, 1]) * reduction - # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T - - # clip boxes - xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) - xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) + use_segments = any(x.any() for x in segments) + new = np.zeros((n, 4)) + if use_segments: # warp segments + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine + + # clip + new[i] = segment2box(xy, width, height) + + else: # warp boxes + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # clip + new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) + new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) # filter candidates - i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T) + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) targets = targets[i] - targets[:, 1:5] = xy[i] + targets[:, 1:5] = new[i] return img, targets diff --git a/utils/general.py b/utils/general.py index bbc0f32b8425..e5bbc50c6177 100755 --- a/utils/general.py +++ b/utils/general.py @@ -47,11 +47,16 @@ def get_latest_run(search_dir='.'): return max(last_list, key=os.path.getctime) if last_list else '' +def isdocker(): + # Is environment a Docker container + return Path('/workspace').exists() # or Path('/.dockerenv').exists() + + def check_online(): # Check internet connectivity import socket try: - socket.create_connection(("1.1.1.1", 53)) # check host accesability + socket.create_connection(("1.1.1.1", 443), 5) # check host accesability return True except OSError: return False @@ -62,7 +67,7 @@ def check_git_status(): print(colorstr('github: '), end='') try: assert Path('.git').exists(), 'skipping check (not a git repository)' - assert not Path('/workspace').exists(), 'skipping check (Docker image)' # not Path('/.dockerenv').exists() + assert not isdocker(), 'skipping check (Docker image)' assert check_online(), 'skipping check (offline)' cmd = 'git fetch && git config --get remote.origin.url' @@ -95,6 +100,20 @@ def check_img_size(img_size, s=32): return new_size +def check_imshow(): + # Check if environment supports image displays + try: + assert not isdocker(), 'cv2.imshow() is disabled in Docker environments' + cv2.imshow('test', np.zeros((1, 1, 3))) + cv2.waitKey(1) + cv2.destroyAllWindows() + cv2.waitKey(1) + return True + except Exception as e: + print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}') + return False + + def check_file(file): # Search for file if not found if os.path.isfile(file) or file == '': @@ -225,7 +244,7 @@ def xywh2xyxy(x): return y -def xywhn2xyxy(x, w=640, h=640, padw=32, padh=32): +def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x @@ -235,6 +254,40 @@ def xywhn2xyxy(x, w=640, h=640, padw=32, padh=32): return y +def xyn2xy(x, w=640, h=640, padw=0, padh=0): + # Convert normalized segments into pixel segments, shape (n,2) + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = w * x[:, 0] + padw # top left x + y[:, 1] = h * x[:, 1] + padh # top left y + return y + + +def segment2box(segment, width=640, height=640): + # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) + x, y = segment.T # segment xy + inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) + x, y, = x[inside], y[inside] + return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # cls, xyxy + + +def segments2boxes(segments): + # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) + boxes = [] + for s in segments: + x, y = s.T # segment xy + boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy + return xyxy2xywh(np.array(boxes)) # cls, xywh + + +def resample_segments(segments, n=1000): + # Up-sample an (n,2) segment + for i, s in enumerate(segments): + x = np.linspace(0, len(s) - 1, n) + xp = np.arange(len(s)) + segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy + return segments + + def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape @@ -337,11 +390,12 @@ def wh_iou(wh1, wh2): return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) -def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): - """Performs Non-Maximum Suppression (NMS) on inference results +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, + labels=()): + """Runs Non-Maximum Suppression (NMS) on inference results Returns: - detections with shape: nx6 (x1, y1, x2, y2, conf, cls) + list of detections, on (n,6) tensor per image [xyxy, conf, cls] """ nc = prediction.shape[2] - 5 # number of classes @@ -353,7 +407,7 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections - multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() @@ -430,8 +484,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non def strip_optimizer(f='weights/best.pt', s=''): # from utils.general import *; strip_optimizer() # Strip optimizer from 'f' to finalize training, optionally save as 's' x = torch.load(f, map_location=torch.device('cpu')) - for key in 'optimizer', 'training_results', 'wandb_id': - x[key] = None + for k in 'optimizer', 'training_results', 'wandb_id', 'ema': # keys + x[k] = None x['epoch'] = -1 x['model'].half() # to FP16 for p in x['model'].parameters(): diff --git a/utils/loss.py b/utils/loss.py index 889ddf7295da..2302d18de87d 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -105,9 +105,8 @@ def __init__(self, model, autobalance=False): BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module - self.balance = {3: [3.67, 1.0, 0.43], 4: [3.78, 1.0, 0.39, 0.22], 5: [3.88, 1.0, 0.37, 0.17, 0.10]}[det.nl] - # self.balance = [1.0] * det.nl - self.ssi = (det.stride == 16).nonzero(as_tuple=False).item() # stride 16 index + self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 + self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance for k in 'na', 'nc', 'nl', 'anchors': setattr(self, k, getattr(det, k)) diff --git a/utils/plots.py b/utils/plots.py index 3ec793528fe5..aa9a1cab81f0 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -15,7 +15,7 @@ import seaborn as sns import torch import yaml -from PIL import Image, ImageDraw +from PIL import Image, ImageDraw, ImageFont from scipy.signal import butter, filtfilt from utils.general import xywh2xyxy, xyxy2xywh @@ -54,7 +54,7 @@ def butter_lowpass(cutoff, fs, order): return filtfilt(b, a, data) # forward-backward filter -def plot_one_box(x, img, color=None, label=None, line_thickness=None): +def plot_one_box(x, img, color=None, label=None, line_thickness=3): # Plots one bounding box on image img tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] @@ -68,6 +68,20 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None): cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) +def plot_one_box_PIL(box, img, color=None, label=None, line_thickness=None): + img = Image.fromarray(img) + draw = ImageDraw.Draw(img) + line_thickness = line_thickness or max(int(min(img.size) / 200), 2) + draw.rectangle(box, width=line_thickness, outline=tuple(color)) # plot + if label: + fontsize = max(round(max(img.size) / 40), 12) + font = ImageFont.truetype("Arial.ttf", fontsize) + txt_width, txt_height = font.getsize(label) + draw.rectangle([box[0], box[1] - txt_height + 4, box[0] + txt_width, box[1]], fill=tuple(color)) + draw.text((box[0], box[1] - txt_height + 1), label, fill=(255, 255, 255), font=font) + return np.asarray(img) + + def plot_wh_methods(): # from utils.plots import *; plot_wh_methods() # Compares the two methods for width-height anchor multiplication # https://github.com/ultralytics/yolov3/issues/168 diff --git a/utils/torch_utils.py b/utils/torch_utils.py index 2cb09e71ce71..1b1cc2038c55 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -205,7 +205,7 @@ def model_info(model, verbose=False, img_size=640): try: # FLOPS from thop import profile - stride = int(model.stride.max()) if hasattr(model, 'stride') else 32 + stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float