From 7b415a9fe9c0fc46efd8a2d034c2bdd0d9e0eef6 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 12:38:04 +0530 Subject: [PATCH 001/247] initial instance segmentation support --- evaluator.py | 827 +++++++++++++++++++ models/yolo.py | 85 +- models/yolov5l_seg.yaml | 48 ++ models/yolov5m_seg.yaml | 48 ++ models/yolov5n_seg.yaml | 48 ++ models/yolov5s_seg.yaml | 48 ++ models/yolov5x_seg.yaml | 48 ++ seg_augmentations.py | 368 +++++++++ seg_dataloaders.py | 1640 +++++++++++++++++++++++++++++++++++++ train_instseg.py | 680 +++++++++++++++ utils/general.py | 16 +- utils/loggers/__init__.py | 247 +++++- utils/metrics.py | 380 ++++++--- utils/plots.py | 899 ++++++++++++++++++++ utils/seg_loss.py | 459 +++++++++++ utils/segment.py | 318 +++++++ 16 files changed, 6015 insertions(+), 144 deletions(-) create mode 100644 evaluator.py create mode 100644 models/yolov5l_seg.yaml create mode 100644 models/yolov5m_seg.yaml create mode 100644 models/yolov5n_seg.yaml create mode 100644 models/yolov5s_seg.yaml create mode 100644 models/yolov5x_seg.yaml create mode 100644 seg_augmentations.py create mode 100644 seg_dataloaders.py create mode 100644 train_instseg.py create mode 100644 utils/seg_loss.py create mode 100644 utils/segment.py diff --git a/evaluator.py b/evaluator.py new file mode 100644 index 000000000000..e15d090ad625 --- /dev/null +++ b/evaluator.py @@ -0,0 +1,827 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Validate a trained YOLOv5 model accuracy on a custom dataset + +Usage: + $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640 +""" + +import json +from pathlib import Path +from threading import Thread + +import numpy as np +import torch +import torch.nn.functional as F +#import pycocotools.mask as mask_util +from tqdm import tqdm + +from models.experimental import attempt_load +from seg_dataloaders import create_dataloader +from utils.general import ( + coco80_to_coco91_class, + increment_path, + colorstr, +) +from utils.general import ( + check_dataset, + check_img_size, + check_suffix, +) +from utils.general import ( + box_iou, + non_max_suppression, + scale_coords, + xyxy2xywh, + xywh2xyxy, +) +from utils.segment import ( + non_max_suppression_masks, + mask_iou, + process_mask, + process_mask_upsample, + scale_masks, +) +from utils.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix +from utils.plots import output_to_target, plot_images_boxes_and_masks +from utils.torch_utils import select_device, time_sync +from PIL import Image + +def save_one_txt(predn, save_conf, shape, file): + # Save one txt result + gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh + for *xyxy, conf, cls in predn.tolist(): + xywh = ( + (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() + ) # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(file, "a") as f: + f.write(("%g " * len(line)).rstrip() % line + "\n") + + +def save_one_json(predn, jdict, path, class_map, pred_masks=None): + # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} + image_id = int(path.stem) if path.stem.isnumeric() else path.stem + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + + if pred_masks is not None: + pred_masks = np.transpose(pred_masks, (2, 0, 1)) + rles = [ + mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in pred_masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): + pred_dict = { + "image_id": image_id, + "category_id": class_map[int(p[5])], + "bbox": [round(x, 3) for x in b], + "score": round(p[4], 5), + } + if pred_masks is not None: + pred_dict["segmentation"] = rles[i] + jdict.append(pred_dict) + + +@torch.no_grad() +class Yolov5Evaluator: + def __init__( + self, + data, + conf_thres=0.001, + iou_thres=0.6, + device="", + single_cls=False, + augment=False, + verbose=False, + project="runs/val", + name="exp", + exist_ok=False, + half=True, + save_dir=Path(""), + nosave=False, + plots=True, + mask=False, + mask_downsample_ratio=1, + ) -> None: + self.data = check_dataset(data) # check + self.conf_thres = conf_thres # confidence threshold + self.iou_thres = iou_thres # NMS IoU threshold + self.device = device # cuda device, i.e. 0 or 0,1,2,3 or cpu + self.single_cls = single_cls # treat as single-class dataset + self.augment = augment # augmented inference + self.verbose = verbose # verbose output + self.project = project # save to project/name + self.name = name # save to project/name + self.exist_ok = exist_ok # existing project/name ok, do not increment + self.half = half # use FP16 half-precision inference + self.save_dir = save_dir + self.nosave = nosave + self.plots = plots + self.mask = mask + self.mask_downsample_ratio = mask_downsample_ratio + + self.nc = 1 if self.single_cls else int(self.data["nc"]) # number of classes + self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 + self.niou = self.iouv.numel() + self.confusion_matrix = ConfusionMatrix(nc=self.nc) + self.dt = [0.0, 0.0, 0.0] + self.names = {k: v for k, v in enumerate(self.data["names"])} + self.s = ( + ("%20s" + "%11s" * 10) + % ( + "Class", + "Images", + "Labels", + "Box:{P", + "R", + "mAP@.5", + "mAP@.5:.95}", + "Mask:{P", + "R", + "mAP@.5", + "mAP@.5:.95}", + ) + if self.mask + else ("%20s" + "%11s" * 6) + % ( + "Class", + "Images", + "Labels", + "P", + "R", + "mAP@.5", + "mAP@.5:.95", + ) + ) + + # coco stuff + self.is_coco = isinstance(self.data.get("val"), str) and self.data[ + "val" + ].endswith( + "coco/val2017.txt" + ) # COCO dataset + self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.jdict = [] + self.iou_thres = 0.65 if self.is_coco else self.iou_thres + + # masks stuff + self.pred_masks = [] # for mask visualization + + # metric stuff + self.seen = 0 + self.stats = [] + self.total_loss = torch.zeros((4 if self.mask else 3)) + self.metric = Metrics() if self.mask else Metric() + + def run_training(self, model, dataloader, compute_loss=None): + """This is for evaluation when training.""" + self.seen = 0 + self.device = next(model.parameters()).device # get model device + # self.iouv.to(self.device) + self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device) + self.half &= self.device.type != "cpu" # half precision only supported on CUDA + model.half() if self.half else model.float() + # Configure + model.eval() + + # inference + # masks will be `None` if training objection. + for batch_i, (img, targets, paths, shapes, masks) in enumerate( + tqdm(dataloader, desc=self.s) + ): + # reset pred_masks + self.pred_masks = [] + img = img.to(self.device, non_blocking=True) + targets = targets.to(self.device) + if masks is not None: + masks = masks.to(self.device) + out, train_out = self.inference(model, img, targets, masks, compute_loss) + + # Statistics per image + for si, pred in enumerate(out): + self.seen += 1 + + # eval in every image level + labels = targets[targets[:, 0] == si, 1:] + gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + + # get predition masks + proto_out = train_out[1][si] if isinstance(train_out, tuple) else None + pred_maski = self.get_predmasks( + pred, + proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, + ) + + # for visualization + if self.plots and batch_i < 3 and pred_maski is not None: + self.pred_masks.append(pred_maski.cpu()) + + # NOTE: eval in training image-size space + self.compute_stat(pred, pred_maski, labels, gt_masksi) + + if batch_i < 3: + self.plot_images(batch_i, img, targets, masks, out, paths) + + # compute map and print it. + t = self.after_infer() + + # Return results + model.float() # for training + return ( + ( + *self.metric.mean_results(), + *(self.total_loss.cpu() / len(dataloader)).tolist(), + ), + self.metric.get_maps(self.nc), + t, + ) + + def run( + self, + weights, + batch_size, + imgsz, + save_txt=False, + save_conf=False, + save_json=False, + task="val", + ): + """This is for native evaluation.""" + model, dataloader, imgsz = self.before_infer( + weights, batch_size, imgsz, save_txt, task + ) + self.seen = 0 + # self.iouv.to(self.device) + self.half &= self.device.type != "cpu" # half precision only supported on CUDA + model.half() if self.half else model.float() + # Configure + model.eval() + + # inference + for batch_i, (img, targets, paths, shapes, masks) in enumerate( + tqdm(dataloader, desc=self.s) + ): + # reset pred_masks + self.pred_masks = [] + img = img.to(self.device, non_blocking=True) + targets = targets.to(self.device) + if masks is not None: + masks = masks.to(self.device) + out, train_out = self.inference(model, img, targets, masks) + + # Statistics per image + for si, pred in enumerate(out): + self.seen += 1 + path = Path(paths[si]) + shape = shapes[si][0] + ratio_pad = shapes[si][1] + + # eval in every image level + labels = targets[targets[:, 0] == si, 1:] + gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + + # get predition masks + proto_out = train_out[1][si] if isinstance(train_out, tuple) else None + pred_maski = self.get_predmasks( + pred, + proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, + ) + + # for visualization + if self.plots and batch_i < 3 and pred_maski is not None: + self.pred_masks.append(pred_maski.cpu()) + + # NOTE: eval in training image-size space + self.compute_stat(pred, pred_maski, labels, gt_masksi) + + # no preditions, not save anything + if len(pred) == 0: + continue + + if save_txt or save_json: + # clone() is for plot_images work correctly + predn = pred.clone() + # 因为test时添加了0.5的padding,因此这里与数据加载的padding不一致,所以需要转入ratio_pad + scale_coords( + img[si].shape[1:], predn[:, :4], shape, ratio_pad + ) # native-space pred + + # Save/log + if save_txt and self.save_dir.exists(): + # NOTE: convert coords to native space when save txt. + # support save box preditions only + save_one_txt( + predn, + save_conf, + shape, + file=self.save_dir / "labels" / (path.stem + ".txt"), + ) + if save_json and self.save_dir.exists(): + # NOTE: convert coords to native space when save json. + # if pred_maski is not None: + # h, w, n + pred_maski = scale_masks( + img[si].shape[1:], + pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, + ratio_pad, + ) + save_one_json( + predn, + self.jdict, + path, + self.class_map, + pred_maski, + ) # append to COCO-JSON dictionary + + if self.plots and batch_i < 3: + self.plot_images(batch_i, img, targets, masks, out, paths) + + # compute map and print it. + t = self.after_infer() + + # save json + if self.save_dir.exists() and save_json: + pred_json = str(self.save_dir / f"predictions.json") # predictions json + print(f"\nEvaluating pycocotools mAP... saving {pred_json}...") + with open(pred_json, "w") as f: + json.dump(self.jdict, f) + + # Print speeds + shape = (batch_size, 3, imgsz, imgsz) + print( + f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" + % t + ) + + s = ( + f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" + if save_txt and self.save_dir.exists() + else "" + ) + print( + f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}" + ) + + # Return results + return ( + ( + *self.metric.mean_results(), + *(self.total_loss.cpu() / len(dataloader)).tolist(), + ), + self.metric.get_maps(self.nc), + t, + ) + + def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): + "prepare for evaluation without training." + self.device = select_device(self.device, batch_size=batch_size) + + # Directories + self.save_dir = increment_path( + Path(self.project) / self.name, exist_ok=self.exist_ok + ) # increment run + if not self.nosave: + (self.save_dir / "labels" if save_txt else self.save_dir).mkdir( + parents=True, exist_ok=True + ) # make dir + + # Load model + check_suffix(weights, ".pt") + model = attempt_load(weights, map_location=self.device) # load FP32 model + gs = max(int(model.stride.max()), 32) # grid size (max stride) + imgsz = check_img_size(imgsz, s=gs) # check image size + + # Data + if self.device.type != "cpu": + model( + torch.zeros(1, 3, imgsz, imgsz) + .to(self.device) + .type_as(next(model.parameters())) + ) # run once + pad = 0.0 if task == "speed" else 0.5 + task = ( + task if task in ("train", "val", "test") else "val" + ) # path to train/val/test images + dataloader = create_dataloader( + self.data[task], + imgsz, + batch_size, + gs, + self.single_cls, + pad=pad, + rect=True, + prefix=colorstr(f"{task}: "), + mask_head=self.mask, + mask_downsample_ratio=self.mask_downsample_ratio, + )[0] + return model, dataloader, imgsz + + def inference(self, model, img, targets, masks=None, compute_loss=None): + """Inference""" + t1 = time_sync() + img = img.half() if self.half else img.float() # uint8 to fp16/32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + _, _, height, width = img.shape # batch size, channels, height, width + t2 = time_sync() + self.dt[0] += t2 - t1 + + # Run model + out, train_out = model( + img, augment=self.augment + ) # inference and training outputs + self.dt[1] += time_sync() - t2 + + # Compute loss + if compute_loss: + self.total_loss += compute_loss(train_out, targets, masks)[ + 1 + ] # box, obj, cls + + # Run NMS + targets[:, 2:] *= torch.Tensor([width, height, width, height]).to( + self.device + ) # to pixels + t3 = time_sync() + out = self.nms( + prediction=out, + conf_thres=self.conf_thres, + iou_thres=self.iou_thres, + multi_label=True, + agnostic=self.single_cls, + ) + self.dt[2] += time_sync() - t3 + return out, train_out + + def after_infer(self): + """Do something after inference, such as plots and get metrics. + Return: + t(tuple): speeds of per image. + """ + # Plot confusion matrix + if self.plots and self.save_dir.exists(): + self.confusion_matrix.plot( + save_dir=self.save_dir, names=list(self.names.values()) + ) + + # Compute statistics + stats = [np.concatenate(x, 0) for x in zip(*self.stats)] # to numpy + box_or_mask_any = stats[0].any() or stats[1].any() + stats = stats[1:] if not self.mask else stats + if len(stats) and box_or_mask_any: + results = self.ap_per_class( + *stats, + self.plots, + self.save_dir if self.save_dir.exists() else None, + self.names, + ) + self.metric.update(results) + nt = np.bincount( + stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc + ) # number of targets per class + else: + nt = torch.zeros(1) + + # make this empty, cause make `stats` self is for reduce some duplicated codes. + self.stats = [] + # print information + self.print_metric(nt, stats) + t = tuple(x / self.seen * 1e3 for x in self.dt) # speeds per image + return t + + def process_batch(self, detections, labels, iouv): + """ + Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. + Arguments: + detections (Array[N, 6]), x1, y1, x2, y2, conf, class + labels (Array[M, 5]), class, x1, y1, x2, y2 + Returns: + correct (Array[N, 10]), for 10 IoU levels + """ + correct = torch.zeros( + detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device + ) + iou = box_iou(labels[:, 1:], detections[:, :4]) + x = torch.where( + (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]) + ) # IoU above threshold and classes match + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) # [label, detection, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + matches = torch.Tensor(matches).to(iouv.device) + correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv + return correct + + def get_predmasks(self, pred, proto_out, gt_shape): + """Get pred masks in different ways. + 1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size) + mask for saving cuda memory. + 2. process_mask_upsample, for val after training to get high quality mask(original size). + + Args: + pred(torch.Tensor): output of network, (N, 5 + mask_dim + class). + proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w). + gt_shape(tuple): shape of gt mask, this shape may not equal to input size of + input image, Cause the mask_downsample_ratio. + Return: + pred_mask(torch.Tensor): predition of final masks with the same size with + input image, (N, input_h, input_w). + """ + if proto_out is None or len(pred) == 0: + return None + process = process_mask_upsample if self.plots else process_mask + gt_shape = ( + gt_shape[0] * self.mask_downsample_ratio, + gt_shape[1] * self.mask_downsample_ratio, + ) + # n, h, w + pred_mask = ( + process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape) + .permute(2, 0, 1) + .contiguous() + ) + return pred_mask + + def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): + assert not ( + (pred_maski is None) ^ (gt_masksi is None) + ), "`proto_out` and `gt_masksi` should be both None or both exist." + if pred_maski is None and gt_masksi is None: + return torch.zeros(0, self.niou, dtype=torch.bool) + + correct = torch.zeros( + predn.shape[0], + self.iouv.shape[0], + dtype=torch.bool, + device=self.iouv.device, + ) + + if not self.plots: + gt_masksi = F.interpolate( + gt_masksi.unsqueeze(0), + pred_maski.shape[1:], + mode="bilinear", + align_corners=False, + ).squeeze(0) + + iou = mask_iou( + gt_masksi.view(gt_masksi.shape[0], -1), + pred_maski.view(pred_maski.shape[0], -1), + ) + x = torch.where( + (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]) + ) # IoU above threshold and classes match + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) # [label, detection, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + matches = torch.Tensor(matches).to(self.iouv.device) + correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv + return correct + + def compute_stat(self, predn, pred_maski, labels, gt_maski): + """Compute states about ious. with boxs size in training img-size space.""" + nl = len(labels) + tcls = labels[:, 0].tolist() if nl else [] # target class + + if len(predn) == 0: + if nl: + self.stats.append( + ( + torch.zeros(0, self.niou, dtype=torch.bool), # boxes + torch.zeros(0, self.niou, dtype=torch.bool), # masks + torch.Tensor(), + torch.Tensor(), + tcls, + ) + ) + return + + # Predictions + if self.single_cls: + predn[:, 5] = 0 + + # Evaluate + if nl: + tbox = xywh2xyxy(labels[:, 1:5]) # target boxes + labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels + # boxes + correct_boxes = self.process_batch(predn, labelsn, self.iouv) + + # masks + correct_masks = self.process_batch_masks( + predn, pred_maski, gt_maski, labelsn + ) + + if self.plots: + self.confusion_matrix.process_batch(predn, labelsn) + else: + correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) + correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) + self.stats.append( + ( + correct_masks.cpu(), + correct_boxes.cpu(), + predn[:, 4].cpu(), + predn[:, 5].cpu(), + tcls, + ) + ) # (correct, conf, pcls, tcls) + + def print_metric(self, nt, stats): + # Print results + pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4) + print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results())) + + # Print results per class + # TODO: self.seen support verbose. + if self.verbose and self.nc > 1 and len(stats): + for i, c in enumerate(self.metric.ap_class_index): + print( + pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)) + ) + + def plot_images(self, i, img, targets, masks, out, paths): + if not self.save_dir.exists(): + return + # plot ground truth + f = self.save_dir / f"val_batch{i}_labels.jpg" # labels + + Thread( + target=plot_images_boxes_and_masks, + args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), + daemon=True, + ).start() + f = self.save_dir / f"val_batch{i}_pred.jpg" # predictions + + # plot predition + if len(self.pred_masks): + pred_masks = ( + torch.cat(self.pred_masks, dim=0) + if len(self.pred_masks) > 1 + else self.pred_masks[0] + ) + else: + pred_masks = None + Thread( + target=plot_images_boxes_and_masks, + args=( + img, + output_to_target(out), + pred_masks, + paths, + f, + self.names, + max(img.shape[2:]), + ), + daemon=True, + ).start() + import wandb + if wandb.run: + res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:])) + res = Image.fromarray(res) + wandb.log({f"pred_{i}":wandb.Image(res)}) + + def nms(self, **kwargs): + return ( + non_max_suppression_masks(**kwargs) + if self.mask + else non_max_suppression(**kwargs) + ) + + def ap_per_class(self, *args): + return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args) + + +class Metric: + def __init__(self) -> None: + self.p = [] # (nc, ) + self.r = [] # (nc, ) + self.f1 = [] # (nc, ) + self.all_ap = [] # (nc, 10) + self.ap_class_index = [] # (nc, ) + + @property + def ap50(self): + """AP@0.5 of all classes. + Return: + (nc, ) or []. + """ + return self.all_ap[:, 0] if len(self.all_ap) else [] + + @property + def ap(self): + """AP@0.5:0.95 + Return: + (nc, ) or []. + """ + return self.all_ap.mean(1) if len(self.all_ap) else [] + + @property + def mp(self): + """mean precision of all classes. + Return: + float. + """ + return self.p.mean() if len(self.p) else 0.0 + + @property + def mr(self): + """mean recall of all classes. + Return: + float. + """ + return self.r.mean() if len(self.r) else 0.0 + + @property + def map50(self): + """Mean AP@0.5 of all classes. + Return: + float. + """ + return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 + + @property + def map(self): + """Mean AP@0.5:0.95 of all classes. + Return: + float. + """ + return self.all_ap.mean() if len(self.all_ap) else 0.0 + + def mean_results(self): + """Mean of results, return mp, mr, map50, map""" + return (self.mp, self.mr, self.map50, self.map) + + def class_result(self, i): + """class-aware result, return p[i], r[i], ap50[i], ap[i]""" + return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) + + def get_maps(self, nc): + maps = np.zeros(nc) + self.map + for i, c in enumerate(self.ap_class_index): + maps[c] = self.ap[i] + return maps + + def update(self, results): + """ + Args: + results: tuple(p, r, ap, f1, ap_class) + """ + p, r, all_ap, f1, ap_class_index = results + self.p = p + self.r = r + self.all_ap = all_ap + self.f1 = f1 + self.ap_class_index = ap_class_index + + +class Metrics: + """Metric for boxes and masks.""" + + def __init__(self) -> None: + self.metric_box = Metric() + self.metric_mask = Metric() + + def update(self, results): + """ + Args: + results: Dict{'boxes': Dict{}, 'masks': Dict{}} + """ + self.metric_box.update(list(results["boxes"].values())) + self.metric_mask.update(list(results["masks"].values())) + + def mean_results(self): + return self.metric_box.mean_results() + self.metric_mask.mean_results() + + def class_result(self, i): + return self.metric_box.class_result(i) + self.metric_mask.class_result(i) + + def get_maps(self, nc): + return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) + + @property + def ap_class_index(self): + # boxes and masks have the same ap_class_index + return self.metric_box.ap_class_index diff --git a/models/yolo.py b/models/yolo.py index 02660e6c4130..e6860a9d7435 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -88,6 +88,64 @@ def _make_grid(self, nx=20, ny=20, i=0): anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape) return grid, anchor_grid +class DetectSegment(Detect): + def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inplace=True): + super().__init__(nc, anchors, ch, inplace) + self.mask_dim = mask_dim + self.no = nc + 5 + self.mask_dim # number of outputs per anchor + self.nm = 5 + self.mask_dim + self.proto_c = proto_channel + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) + for x in ch) # output conv + + # p3作为输入 + self.proto_net = nn.Sequential( + nn.Conv2d(ch[0], self.proto_c, kernel_size=3, stride=1, padding=1), + nn.SiLU(inplace=True), + # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), + # nn.SiLU(inplace=True), + # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), + # nn.SiLU(inplace=True), + # nn.Upsample(scale_factor=2, mode='nearest'), + nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), + nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), + nn.SiLU(inplace=True), + nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0), + nn.SiLU(inplace=True)) + + def forward(self, x): + z = [] # inference output + for i in range(self.nl): + if i == 0: + proto_out = self.proto_net(x[i]) + + x[i] = self.m[i](x[i]) # conv + bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + + if not self.training: # inference + if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: + self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) + + y = x[i].clone() + y[..., 0:5] = y[..., 0:5].sigmoid() + y[..., self.nm:] = y[..., self.nm:].sigmoid() + if self.inplace: + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 + xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + y = torch.cat((xy.type_as(y), wh.type_as(y), y[..., 4:]), -1) + z.append(y.view(-1, self.na * ny * nx, self.no)) + + # TODO: export + if torch.onnx.is_in_onnx_export(): + output = torch.cat(z, 1) + return output # keep the same type with x + else: + return (x, proto_out) if self.training else (torch.cat(z, 1), (x, proto_out)) + class Model(nn.Module): # YOLOv5 model @@ -115,7 +173,15 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, i # Build strides, anchors m = self.model[-1] # Detect() - if isinstance(m, Detect): + if isinstance(m, DetectSegment): + s = 256 # 2x min stride + m.inplace = self.inplace + m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]]) # forward + m.anchors /= m.stride.view(-1, 1, 1) + check_anchor_order(m) + self.stride = m.stride + self._initialize_biases() # only run once + elif isinstance(m, Detect): s = 256 # 2x min stride m.inplace = self.inplace m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward @@ -207,9 +273,9 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. m = self.model[-1] # Detect() module for mi, s in zip(m.m, m.stride): # from - b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85) - b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls + b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) def _print_biases(self): @@ -234,6 +300,12 @@ def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers self.info() return self + def autoshape(self): # add AutoShape module + LOGGER.info('Adding AutoShape... ') + m = AutoShape(self) # wrap model + copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes + return m + def info(self, verbose=False, img_size=640): # print model information model_info(self, verbose, img_size) @@ -279,10 +351,13 @@ def parse_model(d, ch): # model_dict, input_channels(3) args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) - elif m is Detect: + # TODO: channel, gw, gd + elif m in [Detect, DetectSegment]: args.append([ch[x] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) + if m is DetectSegment: + args[3] = make_divisible(args[3] * gw, 8) elif m is Contract: c2 = ch[f] * args[0] ** 2 elif m is Expand: diff --git a/models/yolov5l_seg.yaml b/models/yolov5l_seg.yaml new file mode 100644 index 000000000000..98fbe51addfe --- /dev/null +++ b/models/yolov5l_seg.yaml @@ -0,0 +1,48 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 v6.0 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 6, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 + ] + +# YOLOv5 v6.0 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + ] diff --git a/models/yolov5m_seg.yaml b/models/yolov5m_seg.yaml new file mode 100644 index 000000000000..6b19539786b2 --- /dev/null +++ b/models/yolov5m_seg.yaml @@ -0,0 +1,48 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license + +# Parameters +nc: 80 # number of classes +depth_multiple: 0.67 # model depth multiple +width_multiple: 0.75 # layer channel multiple +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 v6.0 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 6, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 + ] + +# YOLOv5 v6.0 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + ] diff --git a/models/yolov5n_seg.yaml b/models/yolov5n_seg.yaml new file mode 100644 index 000000000000..40a0409aac46 --- /dev/null +++ b/models/yolov5n_seg.yaml @@ -0,0 +1,48 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license + +# Parameters +nc: 80 # number of classes +depth_multiple: 0.33 # model depth multiple +width_multiple: 0.25 # layer channel multiple +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 v6.0 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 6, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 + ] + +# YOLOv5 v6.0 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + ] diff --git a/models/yolov5s_seg.yaml b/models/yolov5s_seg.yaml new file mode 100644 index 000000000000..cb71f5853de6 --- /dev/null +++ b/models/yolov5s_seg.yaml @@ -0,0 +1,48 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license + +# Parameters 1767976 +nc: 3 # number of classes +depth_multiple: 0.33 # model depth multiple +width_multiple: 0.5 # layer channel multiple +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 v6.0 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 6, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 + ] + +# YOLOv5 v6.0 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + ] \ No newline at end of file diff --git a/models/yolov5x_seg.yaml b/models/yolov5x_seg.yaml new file mode 100644 index 000000000000..e1f91c584dca --- /dev/null +++ b/models/yolov5x_seg.yaml @@ -0,0 +1,48 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.33 # model depth multiple +width_multiple: 1.25 # layer channel multiple +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# YOLOv5 v6.0 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, C3, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + [-1, 6, C3, [256]], + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + [-1, 9, C3, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 + ] + +# YOLOv5 v6.0 head +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, C3, [512, False]], # 13 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + [-1, 3, C3, [256, False]], # 17 (P3/8-small) + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) + + [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + ] diff --git a/seg_augmentations.py b/seg_augmentations.py new file mode 100644 index 000000000000..63055f640390 --- /dev/null +++ b/seg_augmentations.py @@ -0,0 +1,368 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Image augmentation functions +""" + +import logging +import math +import random + +import cv2 +import numpy as np + +from utils.general import colorstr, check_version +from utils.segment import segment2box, resample_segments +from utils.metrics import bbox_ioa + + +class Albumentations: + # YOLOv5 Albumentations class (optional, only used if package is installed) + def __init__(self): + self.transform = None + try: + import albumentations as A + + check_version(A.__version__, "1.0.3") # version requirement + + self.transform = A.Compose( + [ + A.Blur(p=0.01), + A.MedianBlur(p=0.01), + A.ToGray(p=0.01), + A.CLAHE(p=0.01), + A.RandomBrightnessContrast(p=0.0), + A.RandomGamma(p=0.0), + A.ImageCompression(quality_lower=75, p=0.0), + ], + bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), + ) + + logging.info( + colorstr("albumentations: ") + + ", ".join(f"{x}" for x in self.transform.transforms if x.p) + ) + except ImportError: # package not installed, skip + pass + except Exception as e: + logging.info(colorstr("albumentations: ") + f"{e}") + + def __call__(self, im, labels, p=1.0): + if self.transform and random.random() < p: + new = self.transform( + image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0] + ) # transformed + im, labels = new["image"], np.array( + [[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])] + ) + return im, labels + + +def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): + # HSV color-space augmentation + if hgain or sgain or vgain: + r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) + dtype = im.dtype # uint8 + + x = np.arange(0, 256, dtype=r.dtype) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + im_hsv = cv2.merge( + (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)) + ) + cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed + + +def hist_equalize(im, clahe=True, bgr=False): + # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255 + yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) + if clahe: + c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + yuv[:, :, 0] = c.apply(yuv[:, :, 0]) + else: + yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram + return cv2.cvtColor( + yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB + ) # convert YUV image to RGB + + +def replicate(im, labels): + # Replicate labels + h, w = im.shape[:2] + boxes = labels[:, 1:].astype(int) + x1, y1, x2, y2 = boxes.T + s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) + for i in s.argsort()[: round(s.size * 0.5)]: # smallest indices + x1b, y1b, x2b, y2b = boxes[i] + bh, bw = y2b - y1b, x2b - x1b + yc, xc = int(random.uniform(0, h - bh)), int( + random.uniform(0, w - bw) + ) # offset x, y + x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] + im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] + labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) + + return im, labels + + +def letterbox( + im, + new_shape=(640, 640), + color=(114, 114, 114), + auto=True, + scaleFill=False, + scaleup=True, + stride=32, + center=True, # center padding or left top padding +): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + if center: + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)) if center else 0, int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)) if center else 0, int(round(dw + 0.1)) + im = cv2.copyMakeBorder( + im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color + ) # add border + return im, ratio, (dw, dh) + + +def random_perspective( + im, + targets=(), + segments=(), + degrees=10, + translate=0.1, + scale=0.1, + shear=10, + perspective=0.0, + border=(0, 0), + area_thr=0.2, + return_seg=False, +): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = im.shape[0] + border[0] * 2 # shape(h,w,c) + width = im.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -im.shape[1] / 2 # x translation (pixels) + C[1, 2] = -im.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = ( + random.uniform(0.5 - translate, 0.5 + translate) * width + ) # x translation (pixels) + T[1, 2] = ( + random.uniform(0.5 - translate, 0.5 + translate) * height + ) # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + im = cv2.warpPerspective( + im, M, dsize=(width, height), borderValue=(114, 114, 114) + ) + else: # affine + im = cv2.warpAffine( + im, M[:2], dsize=(width, height), borderValue=(114, 114, 114) + ) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(im[:, :, ::-1]) # base + # ax[1].imshow(im2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + new_segments = [] + if n: + use_segments = any(x.any() for x in segments) + new = np.zeros((n, 4)) + if use_segments: # warp segments + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment + xy = xy @ M.T # transform + xy = ( + xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] + ) # perspective rescale or affine + + # clip + new[i] = segment2box(xy, width, height) + new_segments.append(xy) + + else: # warp boxes + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape( + n * 4, 2 + ) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape( + n, 8 + ) # perspective rescale or affine + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + new = ( + np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + ) + + # clip + new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) + new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) + + # filter candidates + i = box_candidates( + box1=targets[:, 1:5].T * s, + box2=new.T, + cls=targets[:, 0], + # area_thr=0.01 if use_segments else 0.10, + area_thr=area_thr, + ) + targets = targets[i] + targets[:, 1:5] = new[i] + new_segments = ( + np.array(new_segments)[i] if len(new_segments) else np.array(new_segments) + ) + + return (im, targets, new_segments) if return_seg else (im, targets) + + +def copy_paste(im, labels, segments, p=0.5): + # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) + n = len(segments) + if p and n: + h, w, c = im.shape # height, width, channels + im_new = np.zeros(im.shape, np.uint8) + for j in random.sample(range(n), k=round(p * n)): + l, s = labels[j], segments[j] + box = w - l[3], l[2], w - l[1], l[4] + ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area + if (ioa < 0.30).all(): # allow 30% obscuration of existing labels + labels = np.concatenate((labels, [[l[0], *box]]), 0) + segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) + cv2.drawContours( + im_new, + [segments[j].astype(np.int32)], + -1, + (255, 255, 255), + cv2.FILLED, + ) + + result = cv2.bitwise_and(src1=im, src2=im_new) + result = cv2.flip(result, 1) # augment segments (flip left-right) + i = result > 0 # pixels to replace + # i[:, :] = result.max(2).reshape(h, w, 1) # act over ch + im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug + + return im, labels, segments + + +def cutout(im, labels, p=0.5): + # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 + if random.random() < p: + h, w = im.shape[:2] + scales = ( + [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 + ) # image size fraction + for s in scales: + mask_h = random.randint(1, int(h * s)) # create random masks + mask_w = random.randint(1, int(w * s)) + + # box + xmin = max(0, random.randint(0, w) - mask_w // 2) + ymin = max(0, random.randint(0, h) - mask_h // 2) + xmax = min(w, xmin + mask_w) + ymax = min(h, ymin + mask_h) + + # apply random color mask + im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] + + # return unobscured labels + if len(labels) and s > 0.03: + box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) + ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area + labels = labels[ioa < 0.60] # remove >60% obscured labels + + return labels + + +def mixup(im, labels, im2, labels2): + # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + im = (im * r + im2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + return im, labels + + +def box_candidates( + box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16 +): # box1(4,n), box2(4,n) + # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + area_thr = ( + np.array(area_thr)[cls.astype(np.int)] + if isinstance(area_thr, list) + else area_thr + ) + if isinstance(area_thr, list) and len(area_thr) == 1: + area_thr = area_thr[0] + ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio + return ( + (w2 > wh_thr) + & (h2 > wh_thr) + & (w2 * h2 / (w1 * h1 + eps) > area_thr) + & (ar < ar_thr) + ) # candidates \ No newline at end of file diff --git a/seg_dataloaders.py b/seg_dataloaders.py new file mode 100644 index 000000000000..31fb0a1872ba --- /dev/null +++ b/seg_dataloaders.py @@ -0,0 +1,1640 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Dataloaders +""" + +import glob +import logging +import os +import time +import json +import yaml +import random +from itertools import repeat +from multiprocessing.pool import ThreadPool, Pool +from PIL import Image +from pathlib import Path +from functools import wraps +from zipfile import ZipFile + +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +from torch.utils.data import distributed +from torch.utils.data import Dataset as torchDataset +from torch.utils.data.sampler import RandomSampler +from tqdm import tqdm + + +from seg_augmentations import ( + Albumentations, + augment_hsv, + copy_paste, + letterbox, + mixup, + random_perspective, +) +from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy +from utils.torch_utils import torch_distributed_zero_first + + +from torch.utils.data.sampler import BatchSampler as torchBatchSampler +from torch.utils.data.sampler import Sampler + +class _RepeatSampler: + """ Sampler that repeats forever + + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) +class YoloBatchSampler(torchBatchSampler): + """ + This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler. + It works just like the :class:`torch.utils.data.sampler.BatchSampler`, + but it will turn on/off the mosaic aug. + """ + + def __init__(self, *args, augment=True, **kwargs): + super().__init__(*args, **kwargs) + self.augment = augment + + def __iter__(self): + for batch in super().__iter__(): + yield [(self.augment, idx) for idx in batch] + +def create_dataloader_ori( + path, + imgsz, + batch_size, + stride, + single_cls=False, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + image_weights=False, + quad=False, + prefix="", + shuffle=False, + neg_dir="", + bg_dir="", + area_thr=0.2, + mask_head=False, + mask_downsample_ratio=1, +): + if rect and shuffle: + print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False") + shuffle = False + # Make sure only the first process in DDP process the dataset first, and the following others can use the cache + data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels + with torch_distributed_zero_first(rank): + dataset = data_load( + path, + imgsz, + batch_size, + augment=augment, # augment images + hyp=hyp, # augmentation hyperparameters + rect=rect, # rectangular training + cache_images=cache, + single_cls=single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + prefix=prefix, + neg_dir=neg_dir, + bg_dir=bg_dir, + area_thr=area_thr, + ) + if mask_head: + dataset.downsample_ratio = mask_downsample_ratio + + batch_size = min(batch_size, len(dataset)) + nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else None + loader = DataLoader if image_weights else InfiniteDataLoader + # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader() + dataloader = loader( + dataset, + batch_size=batch_size, + num_workers=nw, + shuffle=shuffle and sampler is None, + sampler=sampler, + pin_memory=True, + collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn, + ) + return dataloader, dataset + + +def create_dataloader( + path, + imgsz, + batch_size, + stride, + single_cls=False, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + image_weights=False, + quad=False, + prefix="", + shuffle=False, + neg_dir="", + bg_dir="", + area_thr=0.2, + mask_head=False, + mask_downsample_ratio=1, +): + if rect and shuffle: + print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False") + shuffle = False + data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels + # Make sure only the first process in DDP process the dataset first, and the following others can use the cache + with torch_distributed_zero_first(rank): + dataset = data_load( + path, + imgsz, + batch_size, + augment=augment, # augment images + hyp=hyp, # augmentation hyperparameters + rect=rect, # rectangular training + cache_images=cache, + single_cls=single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + prefix=prefix, + neg_dir=neg_dir, + bg_dir=bg_dir, + area_thr=area_thr, + ) + if mask_head: + dataset.downsample_ratio = mask_downsample_ratio + + batch_size = min(batch_size, len(dataset)) + nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers + # sampler = InfiniteSampler(len(dataset), seed=0) + sampler = ( + distributed.DistributedSampler(dataset, shuffle=shuffle) + if rank != -1 + else RandomSampler(dataset) + ) + + batch_sampler = ( + YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + augment=augment, + ) + if not rect + else None + ) + dataloader = DataLoader( + dataset, + num_workers=nw, + batch_size=1 + if batch_sampler is not None + else batch_size, # batch-size and batch-sampler is exclusion + batch_sampler=batch_sampler, + pin_memory=True, + collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn, + # Make sure each process has different random seed, especially for 'fork' method. + # Check https://github.com/pytorch/pytorch/issues/63311 for more details. + # but this will make init_seed() not work. + # worker_init_fn=worker_init_reset_seed, + ) + return dataloader, dataset + + +class Dataset(torchDataset): + """This class is a subclass of the base :class:`torch.utils.data.Dataset`, + that enables on the fly resizing of the ``input_dim``. + + Args: + input_dimension (tuple): (width,height) tuple with default dimensions of the network + """ + + def __init__(self, augment=True): + super().__init__() + self.augment = augment + + @staticmethod + def mosaic_getitem(getitem_fn): + """ + Decorator method that needs to be used around the ``__getitem__`` method. |br| + This decorator enables the closing mosaic + + Example: + >>> class CustomSet(ln.data.Dataset): + ... def __len__(self): + ... return 10 + ... @ln.data.Dataset.mosaic_getitem + ... def __getitem__(self, index): + ... return self.enable_mosaic + """ + + @wraps(getitem_fn) + def wrapper(self, index): + if not isinstance(index, int): + self.augment = index[0] + index = index[1] + + ret_val = getitem_fn(self, index) + + return ret_val + + return wrapper + + +class LoadImagesAndLabels(Dataset): + # YOLOv5 train_loader/val_loader, loads images and labels for training and validation + cache_version = 0.6 # dataset labels *.cache version + + def __init__( + self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0.0, + prefix="", + neg_dir="", + bg_dir="", + area_thr=0.2, + ): + super().__init__(augment=augment) + self.img_size = img_size + self.hyp = hyp + self.image_weights = image_weights + self.rect = False if image_weights else rect + self.mosaic = ( + self.augment and not self.rect + ) # load 4 images at a time into a mosaic (only during training) + self.mosaic_border = [-img_size // 2, -img_size // 2] + self.stride = stride + self.path = path + self.albumentations = Albumentations() if augment else None + + # additional feature + self.img_neg_files, self.img_bg_files = self.get_neg_and_bg(neg_dir, bg_dir) + self.area_thr = area_thr + + p = Path(path) # os-agnostic + self.img_files = self.get_img_files(p, prefix) + self.label_files = img2label_paths(self.img_files) # labels + # Check cache + cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache") + labels, shapes, segments, img_files, label_files = self.load_cache(cache_path, prefix) + + self.segments = segments + self.labels = list(labels) + self.shapes = np.array(shapes, dtype=np.float64) + self.img_files = img_files # update + self.label_files = label_files # update + + num_imgs = len(shapes) # number of images + batch_index = np.floor(np.arange(num_imgs) / batch_size).astype(np.int) # batch index + self.batch_index = batch_index # batch index of image + self.num_imgs = num_imgs + self.indices = range(num_imgs) + + # Update labels + for i, (_, segment) in enumerate(zip(self.labels, self.segments)): + if single_cls: # single-class training, merge all classes into 0 + self.labels[i][:, 0] = 0 + if segment: + self.segments[i][:, 0] = 0 + + # Rectangular Training + if self.rect: + num_batches = batch_index[-1] + 1 # number of batches + self.update_rect(num_batches, pad) + + # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) + self.imgs, self.img_npy = [None] * num_imgs, [None] * num_imgs + if cache_images: + self.cache_images(cache_images, prefix) + + def cache_images(self, cache_images, prefix): + """Cache images to disk or ram for faster speed.""" + if cache_images == "disk": + self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + "_npy") + self.img_npy = [ + self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files + ] + self.im_cache_dir.mkdir(parents=True, exist_ok=True) + gb = 0 # Gigabytes of cached images + self.img_hw0, self.img_hw = [None] * self.num_imgs, [None] * self.num_imgs + results = ThreadPool(NUM_THREADS).imap( + lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs)) + ) + pbar = tqdm(enumerate(results), total=self.num_imgs) + for i, x in pbar: + if cache_images == "disk": + if not self.img_npy[i].exists(): + np.save(self.img_npy[i].as_posix(), x[0]) + gb += self.img_npy[i].stat().st_size + else: + ( + self.imgs[i], + self.img_hw0[i], + self.img_hw[i], + ) = x # im, hw_orig, hw_resized = load_image(self, i) + gb += self.imgs[i].nbytes + pbar.desc = f"{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})" + pbar.close() + + def get_img_files(self, p, prefix): + """Read image files.""" + try: + f = [] # image files + if p.is_dir(): # dir + f += glob.glob(str(p / "**" / "*.*"), recursive=True) + # f = list(p.rglob('*.*')) # pathlib + elif p.is_file(): # file + with open(p, "r") as t: + t = t.read().strip().splitlines() + parent = str(p.parent) + os.sep + f += [ + x.replace("./", parent) if x.startswith("./") else x for x in t + ] # local to global path + # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) + else: + raise Exception(f"{prefix}{p} does not exist") + img_files = sorted( + [x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS] + ) + # img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib + assert img_files, f"{prefix}No images found" + except Exception as e: + raise Exception(f"{prefix}Error loading data from {str(p)}: {e}\nSee {HELP_URL}") + return img_files + + def get_neg_and_bg(self, neg_dir, bg_dir): + """Get negative pictures and background pictures.""" + img_neg_files, img_bg_files = [], [] + if os.path.isdir(neg_dir): + img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)] + logging.info( + colorstr("Negative dir: ") + + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training" + ) + + if os.path.isdir(bg_dir): + img_bg_files = [os.path.join(bg_dir, i) for i in os.listdir(bg_dir)] + logging.info( + colorstr("Background dir: ") + + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training" + ) + return img_neg_files, img_bg_files + + def load_cache(self, cache_path, prefix): + """Load labels from *.cache file.""" + try: + cache, exists = ( + np.load(cache_path, allow_pickle=True).item(), + True, + ) # load dict + assert cache["version"] == self.cache_version # same version + assert cache["hash"] == get_hash(self.label_files + self.img_files) # same hash + except: + cache, exists = self.cache_labels(cache_path, prefix), False # cache + + # Display cache + nf, nm, ne, nc, n = cache.pop("results") # found, missing, empty, corrupted, total + if exists: + d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted" + tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results + if cache["msgs"]: + logging.info("\n".join(cache["msgs"])) # display warnings + assert ( + nf > 0 or not self.augment + ), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}" + + # Read cache + [cache.pop(k) for k in ("hash", "version", "msgs")] # remove items + labels, shapes, segments = zip(*cache.values()) + img_files = list(cache.keys()) # update + label_files = img2label_paths(cache.keys()) # update + return labels, shapes, segments, img_files, label_files + + def update_rect(self, num_batches, pad): + """Update attr if rect is True.""" + # Sort by aspect ratio + s = self.shapes # wh + ar = s[:, 1] / s[:, 0] # aspect ratio + irect = ar.argsort() + self.img_files = [self.img_files[i] for i in irect] + self.label_files = [self.label_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + self.segments = [self.segments[i] for i in irect] + self.shapes = s[irect] # wh + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * num_batches + for i in range(num_batches): + ari = ar[self.batch_index == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = ( + np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride + ) + + def cache_labels(self, path=Path("./labels.cache"), prefix=""): + """Cache labels to *.cache file if there is no *.cache file in local.""" + # Cache dataset labels, check images and read shapes + x = {} # dict + nm, nf, ne, nc, msgs = ( + 0, + 0, + 0, + 0, + [], + ) # number missing, found, empty, corrupt, messages + desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." + + with Pool(NUM_THREADS) as pool: + pbar = tqdm( + pool.imap( + verify_image_label, + zip(self.img_files, self.label_files, repeat(prefix)), + ), + desc=desc, + total=len(self.img_files), + ) + for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: + nm += nm_f + nf += nf_f + ne += ne_f + nc += nc_f + if im_file: + x[im_file] = [l, shape, segments] + if msg: + msgs.append(msg) + pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted" + + pbar.close() + if msgs: + logging.info("\n".join(msgs)) + if nf == 0: + logging.info(f"{prefix}WARNING: No labels found in {path}. See {HELP_URL}") + x["hash"] = get_hash(self.label_files + self.img_files) + x["results"] = nf, nm, ne, nc, len(self.img_files) + x["msgs"] = msgs # warnings + x["version"] = self.cache_version # cache version + try: + np.save(path, x) # save cache for next time + path.with_suffix(".cache.npy").rename(path) # remove .npy suffix + logging.info(f"{prefix}New cache created: {path}") + except Exception as e: + logging.info( + f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}" + ) # path not writeable + return x + + def __len__(self): + return len(self.img_files) + + # def __iter__(self): + # self.count = -1 + # print('ran dataset iter') + # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) + # return self + + @Dataset.mosaic_getitem + def __getitem__(self, index): + index = self.indices[index] # linear, shuffled, or image_weights + + hyp = self.hyp + self.mosaic = self.augment and not self.rect + mosaic = self.mosaic and random.random() < hyp["mosaic"] + if mosaic: + # Load mosaic + img, labels = load_mosaic(self, index) + shapes = None + + # MixUp augmentation + if random.random() < hyp["mixup"]: + img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1))) + + else: + # Load image + img, (h0, w0), (h, w) = load_image(self, index) + + # Letterbox + shape = ( + self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size + ) # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + labels = self.labels[index].copy() + if labels.size: # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy( + labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1] + ) + + if self.augment: + img, labels = random_perspective( + img, + labels, + degrees=hyp["degrees"], + translate=hyp["translate"], + scale=hyp["scale"], + shear=hyp["shear"], + perspective=hyp["perspective"], + ) + + nl = len(labels) # number of labels + if nl: + labels[:, 1:5] = xyxy2xywhn( + labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3 + ) + + if self.augment: + # Albumentations + img, labels = self.albumentations(img, labels) + nl = len(labels) # update after albumentations + + # HSV color-space + augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) + + # Flip up-down + if random.random() < hyp["flipud"]: + img = np.flipud(img) + if nl: + labels[:, 2] = 1 - labels[:, 2] + + # Flip left-right + if random.random() < hyp["fliplr"]: + img = np.fliplr(img) + if nl: + labels[:, 1] = 1 - labels[:, 1] + + # Cutouts + # labels = cutout(img, labels, p=0.5) + + labels_out = torch.zeros((nl, 6)) + if nl: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return torch.from_numpy(img), labels_out, self.img_files[index], shapes + + @staticmethod + def collate_fn(batch): + img, label, path, shapes = zip(*batch) # transposed + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes, None + + @staticmethod + def collate_fn4(batch): + img, label, path, shapes = zip(*batch) # transposed + n = len(shapes) // 4 + img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] + + ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) + wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) + s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale + for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW + i *= 4 + if random.random() < 0.5: + im = F.interpolate( + img[i].unsqueeze(0).float(), + scale_factor=2.0, + mode="bilinear", + align_corners=False, + )[0].type(img[i].type()) + l = label[i] + else: + im = torch.cat( + ( + torch.cat((img[i], img[i + 1]), 1), + torch.cat((img[i + 2], img[i + 3]), 1), + ), + 2, + ) + l = ( + torch.cat( + ( + label[i], + label[i + 1] + ho, + label[i + 2] + wo, + label[i + 3] + ho + wo, + ), + 0, + ) + * s + ) + img4.append(im) + label4.append(l) + + for i, l in enumerate(label4): + l[:, 0] = i # add target image index for build_targets() + + return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4 + + +class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing + def __init__( + self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0, + prefix="", + neg_dir="", + bg_dir="", + area_thr=0.2, + downsample_ratio=1, # return dowmsample mask + ): + super().__init__( + path, + img_size, + batch_size, + augment, + hyp, + rect, + image_weights, + cache_images, + single_cls, + stride, + pad, + prefix, + neg_dir, + bg_dir, + area_thr, + ) + self.downsample_ratio = downsample_ratio + + @Dataset.mosaic_getitem + def __getitem__(self, index): + index = self.indices[index] # linear, shuffled, or image_weights + + hyp = self.hyp + self.mosaic = self.augment and not self.rect + mosaic = self.mosaic and random.random() < hyp["mosaic"] + masks = [] + if mosaic: + # Load mosaic + img, labels, segments = load_mosaic(self, index, return_seg=True) + shapes = None + + # TODO: Mixup not support segment for now + # MixUp augmentation + if random.random() < hyp["mixup"]: + img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1))) + + else: + # Load image + img, (h0, w0), (h, w) = load_image(self, index) + + # Letterbox + shape = ( + self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size + ) # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + labels = self.labels[index].copy() + # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy + segments = self.segments[index].copy() + # TODO + if len(segments): + for i_s in range(len(segments)): + segments[i_s] = xyn2xy( + segments[i_s], + ratio[0] * w, + ratio[1] * h, + padw=pad[0], + padh=pad[1], + ) + if labels.size: # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy( + labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1] + ) + + if self.augment: + img, labels, segments = random_perspective( + img, + labels, + segments=segments, + degrees=hyp["degrees"], + translate=hyp["translate"], + scale=hyp["scale"], + shear=hyp["shear"], + perspective=hyp["perspective"], + return_seg=True, + ) + + nl = len(labels) # number of labels + if nl: + labels[:, 1:5] = xyxy2xywhn( + labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3 + ) + for si in range(len(segments)): + mask = polygon2mask_downsample( + img.shape[:2], + [segments[si].reshape(-1)], + downsample_ratio=self.downsample_ratio, + ) + masks.append(torch.from_numpy(mask.astype(np.float32))) + + masks = ( + torch.stack(masks, axis=0) + if len(masks) + else torch.zeros( + nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio + ) + ) + # TODO: albumentations support + if self.augment: + # Albumentations + # there are some augmentation that won't change boxes and masks, + # so just be it for now. + img, labels = self.albumentations(img, labels) + nl = len(labels) # update after albumentations + + # HSV color-space + augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) + + # Flip up-down + if random.random() < hyp["flipud"]: + img = np.flipud(img) + if nl: + labels[:, 2] = 1 - labels[:, 2] + masks = torch.flip(masks, dims=[1]) + + # Flip left-right + if random.random() < hyp["fliplr"]: + img = np.fliplr(img) + if nl: + labels[:, 1] = 1 - labels[:, 1] + masks = torch.flip(masks, dims=[2]) + + # Cutouts + # labels = cutout(img, labels, p=0.5) + + labels_out = torch.zeros((nl, 6)) + if nl: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return (torch.from_numpy(img), labels_out, self.img_files[index], shapes, masks) + + @staticmethod + def collate_fn(batch): + img, label, path, shapes, masks = zip(*batch) # transposed + batched_masks = torch.cat(masks, 0) + # print(batched_masks.shape) + # print('batched_masks:', (batched_masks > 0).sum()) + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks + + +# Ancillary functions -------------------------------------------------------------------------------------------------- +def load_image(self, i): + # loads 1 image from dataset index 'i', returns im, original hw, resized hw + im = self.imgs[i] + if im is None: # not cached in ram + npy = self.img_npy[i] + if npy and npy.exists(): # load npy + im = np.load(npy) + else: # read image + path = self.img_files[i] + im = cv2.imread(path) # BGR + assert im is not None, "Image Not Found " + path + h0, w0 = im.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # ratio + if r != 1: # if sizes are not equal + im = cv2.resize( + im, + (int(w0 * r), int(h0 * r)), + interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR, + ) + return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized + else: + return ( + self.imgs[i], + self.img_hw0[i], + self.img_hw[i], + ) # im, hw_original, hw_resized + + +def load_neg_image(self, index): + path = self.img_neg_files[index] + img = cv2.imread(path) # BGR + assert img is not None, "Image Not Found " + path + h0, w0 = img.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # resize image to img_size + if r != 1: # always resize down, only resize up if training with augmentation + interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR + img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) + return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized + + +def load_bg_image(self, index): + path = self.img_files[index] + bg_path = self.img_bg_files[np.random.randint(0, len(self.img_bg_files))] + img, coord, _, (w, h) = paste1( + path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5) + ) + label = self.labels[index] + label[:, 1] = (label[:, 1] * w + coord[0]) / img.shape[1] + label[:, 2] = (label[:, 2] * h + coord[1]) / img.shape[0] + label[:, 3] = label[:, 3] * w / img.shape[1] + label[:, 4] = label[:, 4] * h / img.shape[0] + + assert img is not None, "Image Not Found " + path + h0, w0 = img.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # resize image to img_size + if r != 1: # always resize down, only resize up if training with augmentation + interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR + img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) + return img, (h0, w0), img.shape[:2], label # img, hw_original, hw_resized + + +def load_mosaic(self, index, return_seg=False): + # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic + labels4, segments4 = [], [] + s = self.img_size + yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y + + num_neg = random.randint(0, 2) if len(self.img_neg_files) else 0 + # 3 additional image indices + indices = [index] + random.choices(self.indices, k=(3 - num_neg)) + indices = indices + random.choices(range(len(self.img_neg_files)), k=num_neg) + ri = list(range(4)) + random.shuffle(ri) + for j, (i, index) in enumerate(zip(ri, indices)): + temp_label = None + # Load image + # TODO + if j < (4 - num_neg): + if len(self.img_bg_files) and (random.uniform(0, 1) > 0.5): + img, _, (h, w), temp_label = load_bg_image(self, index) + else: + img, _, (h, w) = load_image(self, index) + else: + img, _, (h, w) = load_neg_image(self, index) + # place img in img4 + if j == 0: + img4 = np.full( + (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8 + ) # base image with 4 tiles + if i == 0: # top left + x1a, y1a, x2a, y2a = ( + max(xc - w, 0), + max(yc - h, 0), + xc, + yc, + ) # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = ( + w - (x2a - x1a), + h - (y2a - y1a), + w, + h, + ) # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + # Labels + if j >= (4 - num_neg): + continue + + # TODO: deal with segments + if len(self.img_bg_files) and temp_label is not None: + labels, segments = temp_label, [] + else: + labels, segments = self.labels[index].copy(), self.segments[index].copy() + + if labels.size: + labels[:, 1:] = xywhn2xyxy( + labels[:, 1:], w, h, padw, padh + ) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padw, padh) for x in segments] + labels4.append(labels) + segments4.extend(segments) + + # Concat/clip labels + labels4 = np.concatenate(labels4, 0) + for x in (labels4[:, 1:], *segments4): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img4, labels4 = replicate(img4, labels4) # replicate + + # Augment + img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) + results = random_perspective( + img4, + labels4, + segments4, + degrees=self.hyp["degrees"], + translate=self.hyp["translate"], + scale=self.hyp["scale"], + shear=self.hyp["shear"], + perspective=self.hyp["perspective"], + border=self.mosaic_border, + area_thr=self.area_thr, + return_seg=return_seg, + ) # border to remove + # return (img4, labels4, segments4) if return_seg else (img4, labels4) + return results + + +def load_mosaic9(self, index): + # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic + labels9, segments9 = [], [] + s = self.img_size + indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices + random.shuffle(indices) + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = load_image(self, index) + + # place img in img9 + if i == 0: # center + img9 = np.full( + (s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8 + ) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padx, pady = c[:2] + x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords + + # Labels + labels, segments = self.labels[index].copy(), self.segments[index].copy() + if labels.size: + labels[:, 1:] = xywhn2xyxy( + labels[:, 1:], w, h, padx, pady + ) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padx, pady) for x in segments] + labels9.append(labels) + segments9.extend(segments) + + # Image + img9[y1:y2, x1:x2] = img[y1 - pady :, x1 - padx :] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous + + # Offset + yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y + img9 = img9[yc : yc + 2 * s, xc : xc + 2 * s] + + # Concat/clip labels + labels9 = np.concatenate(labels9, 0) + labels9[:, [1, 3]] -= xc + labels9[:, [2, 4]] -= yc + c = np.array([xc, yc]) # centers + segments9 = [x - c for x in segments9] + + for x in (labels9[:, 1:], *segments9): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img9, labels9 = replicate(img9, labels9) # replicate + + # Augment + img9, labels9 = random_perspective( + img9, + labels9, + segments9, + degrees=self.hyp["degrees"], + translate=self.hyp["translate"], + scale=self.hyp["scale"], + shear=self.hyp["shear"], + perspective=self.hyp["perspective"], + border=self.mosaic_border, + ) # border to remove + + return img9, labels9 + + +def dataset_stats(path="coco128.yaml", autodownload=False, verbose=False, profile=False, hub=False): + """Return dataset statistics dictionary with images and instances counts per split per class + To run in parent directory: export PYTHONPATH="$PWD/yolov5" + Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True) + Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip') + Arguments + path: Path to data.yaml or data.zip (with data.yaml inside data.zip) + autodownload: Attempt to download dataset if not found locally + verbose: Print stats dictionary + """ + + def round_labels(labels): + # Update labels to integer class and 6 decimal place floats + return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels] + + def unzip(path): + # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/' + if str(path).endswith(".zip"): # path is data.zip + assert Path(path).is_file(), f"Error unzipping {path}, file not found" + ZipFile(path).extractall(path=path.parent) # unzip + dir = path.with_suffix("") # dataset directory == zip name + return ( + True, + str(dir), + next(dir.rglob("*.yaml")), + ) # zipped, data_dir, yaml_path + else: # path is data.yaml + return False, None, path + + def hub_ops(f, max_dim=1920): + # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing + f_new = im_dir / Path(f).name # dataset-hub image filename + try: # use PIL + im = Image.open(f) + r = max_dim / max(im.height, im.width) # ratio + if r < 1.0: # image too large + im = im.resize((int(im.width * r), int(im.height * r))) + im.save(f_new, quality=75) # save + except Exception as e: # use OpenCV + print(f"WARNING: HUB ops PIL failure {f}: {e}") + im = cv2.imread(f) + im_height, im_width = im.shape[:2] + r = max_dim / max(im_height, im_width) # ratio + if r < 1.0: # image too large + im = cv2.resize( + im, + (int(im_width * r), int(im_height * r)), + interpolation=cv2.INTER_LINEAR, + ) + cv2.imwrite(str(f_new), im) + + zipped, data_dir, yaml_path = unzip(Path(path)) + with open(check_yaml(yaml_path), errors="ignore") as f: + data = yaml.safe_load(f) # data dict + if zipped: + data["path"] = data_dir # TODO: should this be dir.resolve()? + check_dataset(data, autodownload) # download dataset if missing + hub_dir = Path(data["path"] + ("-hub" if hub else "")) + stats = {"nc": data["nc"], "names": data["names"]} # statistics dictionary + for split in "train", "val", "test": + if data.get(split) is None: + stats[split] = None # i.e. no test set + continue + x = [] + dataset = LoadImagesAndLabels(data[split]) # load dataset + for label in tqdm(dataset.labels, total=dataset.num_imgs, desc="Statistics"): + x.append(np.bincount(label[:, 0].astype(int), minlength=data["nc"])) + x = np.array(x) # shape(128x80) + stats[split] = { + "instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()}, + "image_stats": { + "total": dataset.num_imgs, + "unlabelled": int(np.all(x == 0, 1).sum()), + "per_class": (x > 0).sum(0).tolist(), + }, + "labels": [ + {str(Path(k).name): round_labels(v.tolist())} + for k, v in zip(dataset.img_files, dataset.labels) + ], + } + + if hub: + im_dir = hub_dir / "images" + im_dir.mkdir(parents=True, exist_ok=True) + for _ in tqdm( + ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), + total=dataset.num_imgs, + desc="HUB Ops", + ): + pass + + # Profile + stats_path = hub_dir / "stats.json" + if profile: + for _ in range(1): + file = stats_path.with_suffix(".npy") + t1 = time.time() + np.save(file, stats) + t2 = time.time() + x = np.load(file, allow_pickle=True) + print(f"stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write") + + file = stats_path.with_suffix(".json") + t1 = time.time() + with open(file, "w") as f: + json.dump(stats, f) # save stats *.json + t2 = time.time() + with open(file, "r") as f: + x = json.load(f) # load hyps dict + print(f"stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write") + + # Save, print and return + if hub: + print(f"Saving {stats_path.resolve()}...") + with open(stats_path, "w") as f: + json.dump(stats, f) # save stats.json + if verbose: + print(json.dumps(stats, indent=2, sort_keys=False)) + return stats + + +import os +import glob +import shutil +import hashlib +import uuid +import torch +import cv2 +import numpy as np +import random +from pathlib import Path +from PIL import Image, ImageOps, ExifTags +from utils.segment import segments2boxes +from utils.general import xywh2xyxy + + +# Parameters +HELP_URL = "https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data" +IMG_FORMATS = [ + "bmp", + "jpg", + "jpeg", + "png", + "tif", + "tiff", + "dng", + "webp", + "mpo", +] # acceptable image suffixes +VID_FORMATS = [ + "mov", + "avi", + "mp4", + "mpg", + "mpeg", + "m4v", + "wmv", + "mkv", + "vdo", + "flv", +] # acceptable video suffixes +NUM_THREADS = min(8, os.cpu_count()) # number of multiprocessing threads + +# Get orientation exif tag +for orientation in ExifTags.TAGS.keys(): + if ExifTags.TAGS[orientation] == "Orientation": + break + +def get_hash(paths): + # Returns a single hash value of a list of paths (files or dirs) + size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes + h = hashlib.md5(str(size).encode()) # hash sizes + h.update("".join(paths).encode()) # hash paths + return h.hexdigest() # return hash + + +def exif_size(img): + # Returns exif-corrected PIL size + s = img.size # (width, height) + try: + rotation = dict(img._getexif().items())[orientation] + if rotation == 6: # rotation 270 + s = (s[1], s[0]) + elif rotation == 8: # rotation 90 + s = (s[1], s[0]) + except: + pass + + return s + + +def exif_transpose(image): + """ + Transpose a PIL image accordingly if it has an EXIF Orientation tag. + Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose() + + :param image: The image to transpose. + :return: An image. + """ + exif = image.getexif() + orientation = exif.get(0x0112, 1) # default 1 + if orientation > 1: + method = { + 2: Image.FLIP_LEFT_RIGHT, + 3: Image.ROTATE_180, + 4: Image.FLIP_TOP_BOTTOM, + 5: Image.TRANSPOSE, + 6: Image.ROTATE_270, + 7: Image.TRANSVERSE, + 8: Image.ROTATE_90, + }.get(orientation) + if method is not None: + image = image.transpose(method) + del exif[0x0112] + image.info["exif"] = exif.tobytes() + return image + + +def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (np.ndarray): [N, M], N is the number of polygons, + M is the number of points(Be divided by 2). + """ + img_size = ( + img_size[0] // downsample_ratio, + img_size[1] // downsample_ratio + ) + mask = np.zeros(img_size, dtype=np.uint8) + polygons = np.asarray(polygons) / downsample_ratio + polygons = polygons.astype(np.int32) + shape = polygons.shape + polygons = polygons.reshape(shape[0], -1, 2) + cv2.fillPoly(mask, polygons, color=color) + return mask + + +def worker_init_reset_seed(worker_id): + seed = uuid.uuid4().int % 2 ** 32 + random.seed(seed) + torch.set_rng_state(torch.manual_seed(seed).get_state()) + np.random.seed(seed) + + +def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (np.ndarray): [N, M], N is the number of polygons, + M is the number of points(Be divided by 2). + """ + mask = np.zeros(img_size, dtype=np.uint8) + polygons = np.asarray(polygons) + polygons = polygons.astype(np.int32) + shape = polygons.shape + polygons = polygons.reshape(shape[0], -1, 2) + cv2.fillPoly(mask, polygons, color=color) + nh, nw = ( + img_size[0] // downsample_ratio, + img_size[1] // downsample_ratio + ) + mask = cv2.resize(mask, (nw, nh)) + return mask + +def img2label_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = ( + os.sep + "images" + os.sep, + os.sep + "labels" + os.sep, + ) # /images/, /labels/ substrings + return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths] + + +def create_folder(path="./new"): + # Create folder + if os.path.exists(path): + shutil.rmtree(path) # delete output folder + os.makedirs(path) # make new output folder + + +def flatten_recursive(path="../datasets/coco128"): + # Flatten a recursive directory by bringing all files to top level + new_path = Path(path + "_flat") + create_folder(new_path) + for file in tqdm(glob.glob(str(Path(path)) + "/**/*.*", recursive=True)): + shutil.copyfile(file, new_path / Path(file).name) + +def extract_boxes( + path="../datasets/coco128", +): # from utils.datasets import *; extract_boxes() + # Convert detection dataset into classification dataset, with one directory per class + path = Path(path) # images dir + shutil.rmtree(path / "classifier") if ( + path / "classifier" + ).is_dir() else None # remove existing + files = list(path.rglob("*.*")) + n = len(files) # number of files + for im_file in tqdm(files, total=n): + if im_file.suffix[1:] in IMG_FORMATS: + # image + im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB + h, w = im.shape[:2] + + # labels + lb_file = Path(img2label_paths([str(im_file)])[0]) + if Path(lb_file).exists(): + with open(lb_file, "r") as f: + lb = np.array( + [x.split() for x in f.read().strip().splitlines()], + dtype=np.float32, + ) # labels + + for j, x in enumerate(lb): + c = int(x[0]) # class + f = ( + (path / "classifier") + / f"{c}" + / f"{path.stem}_{im_file.stem}_{j}.jpg" + ) # new filename + if not f.parent.is_dir(): + f.parent.mkdir(parents=True) + + b = x[1:] * [w, h, w, h] # box + # b[2:] = b[2:].max() # rectangle to square + b[2:] = b[2:] * 1.2 + 3 # pad + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + + b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[1, 3]] = np.clip(b[[1, 3]], 0, h) + assert cv2.imwrite( + str(f), im[b[1] : b[3], b[0] : b[2]] + ), f"box failure in {f}" + + +def autosplit( + path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False +): + """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files + Usage: from utils.datasets import *; autosplit() + Arguments + path: Path to images directory + weights: Train, val, test weights (list, tuple) + annotated_only: Only use images with an annotated txt file + """ + path = Path(path) # images dir + files = sorted( + [x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS] + ) # image files only + n = len(files) # number of files + random.seed(0) # for reproducibility + indices = random.choices( + [0, 1, 2], weights=weights, k=n + ) # assign each image to a split + + txt = [ + "autosplit_train.txt", + "autosplit_val.txt", + "autosplit_test.txt", + ] # 3 txt files + [(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing + + print( + f"Autosplitting images from {path}" + + ", using *.txt labeled images only" * annotated_only + ) + for i, img in tqdm(zip(indices, files), total=n): + if ( + not annotated_only or Path(img2label_paths([str(img)])[0]).exists() + ): # check label + with open(path.parent / txt[i], "a") as f: + f.write( + "./" + img.relative_to(path.parent).as_posix() + "\n" + ) # add image to txt file + + +def verify_image_label(args): + # Verify one image-label pair + im_file, lb_file, prefix = args + nm, nf, ne, nc, msg, segments = ( + 0, + 0, + 0, + 0, + "", + [], + ) # number (missing, found, empty, corrupt), message, segments + try: + # verify images + im = Image.open(im_file) + im.verify() # PIL verify + shape = exif_size(im) # image size + assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels" + assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}" + if im.format.lower() in ("jpg", "jpeg"): + with open(im_file, "rb") as f: + f.seek(-2, 2) + if f.read() != b"\xff\xd9": # corrupt JPEG + ImageOps.exif_transpose(Image.open(im_file)).save( + im_file, "JPEG", subsampling=0, quality=100 + ) + msg = f"{prefix}WARNING: {im_file}: corrupt JPEG restored and saved" + + # verify labels + if os.path.isfile(lb_file): + nf = 1 # label found + with open(lb_file, "r") as f: + l = [x.split() for x in f.read().strip().splitlines() if len(x)] + if any([len(x) > 6 for x in l]): # is segment + classes = np.array([x[0] for x in l], dtype=np.float32) + segments = [ + np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l + ] # (cls, xy1...) + l = np.concatenate( + (classes.reshape(-1, 1), segments2boxes(segments)), 1 + ) # (cls, xywh) + l = np.array(l, dtype=np.float32) + nl = len(l) + if nl: + assert ( + l.shape[1] == 5 + ), f"labels require 5 columns, {l.shape[1]} columns detected" + assert (l >= 0).all(), f"negative label values {l[l < 0]}" + assert ( + l[:, 1:] <= 1 + ).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}" + l, idx = np.unique(l, axis=0, return_index=True) # remove duplicate rows + # NOTE: `np.unique` will change the order of `l`, so adjust the segments order too. + segments = [segments[i] for i in idx] if len(segments) > 0 else segments + if len(l) < nl: + msg = f"{prefix}WARNING: {im_file}: {nl - len(l)} duplicate labels removed" + else: + ne = 1 # label empty + l = np.zeros((0, 5), dtype=np.float32) + else: + nm = 1 # label missing + l = np.zeros((0, 5), dtype=np.float32) + return im_file, l, shape, segments, nm, nf, ne, nc, msg + except Exception as e: + nc = 1 + msg = f"{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}" + return [None, None, None, None, nm, nf, ne, nc, msg] + +from torch.utils.data import DataLoader as torchDataLoader + +class DataLoader(torchDataLoader): + """ + Lightnet dataloader that enables on the fly resizing of the images. + See :class:`torch.utils.data.DataLoader` for more information on the arguments. + Check more on the following website: + https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def close_augment(self): + self.batch_sampler.augment = False + + +class InfiniteDataLoader(torchDataLoader): + """Dataloader that reuses workers + + Uses same syntax as vanilla DataLoader + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler)) + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for i in range(len(self)): + yield next(self.iterator) + + +# NEW FILE +from PIL import Image, ImageDraw +import numpy as np +from PIL import ImageFile +# import numbers + +ImageFile.LOAD_TRUNCATED_IMAGES = True + + +def get_raito(new_size, original_size): + """Get the ratio bewtten input_size and original_size""" + # # mmdet way + # iw, ih = new_size + # ow, oh = original_size + # max_long_edge = max(iw, ih) + # max_short_edge = min(iw, ih) + # ratio = min(max_long_edge / max(ow, oh), max_short_edge / min(ow, oh)) + # return ratio + + # # yolov5 way + return min(new_size[0] / original_size[0], new_size[1] / original_size[1]) + +def imresize(img, new_size): + """Resize the img with new_size by PIL(keep aspect). + + Args: + img (PIL): The original image. + new_size (tuple): The new size(w, h). + """ + if isinstance(new_size, int): + new_size = (new_size, new_size) + old_size = img.size + ratio = get_raito(new_size, old_size) + img = img.resize((int(old_size[0] * ratio), int(old_size[1] * ratio))) + return img + +def get_wh(a, b): + return np.random.randint(a, b) + + +def paste2(sample1, sample2, background, scale=1.2): + sample1 = Image.open(sample1) + d_w1, d_h1 = sample1.size + + sample2 = Image.open(sample2) + d_w2, d_h2 = sample2.size + + # print(sample.size) + background = Image.open(background) + background = background.resize((int((d_w1 + d_w2) * scale), int((d_h1 + d_h2) * scale))) + bw, bh = background.size + + x1, y1 = get_wh(0, int(d_w1 * scale) - d_w1), get_wh(0, bh - d_h1) + x2, y2 = get_wh(int(d_w1 * scale), bw - d_w2), get_wh(0, bh - d_h2) + # x1, y1 = get_wh(0, int(bw / 2) - d_w1), get_wh(0, bh - d_h1) + # x2, y2 = get_wh(int(bw / 2), bw - d_w2), get_wh(0, bh - d_h2) + + background.paste(sample1, (x1, y1)) + background.paste(sample2, (x2, y2)) + # background = background.resize((416, 416)) + + return np.array(background), (x1, y1, x2, y2), background + # print(background.size) + # background.show() + + +def paste1(sample, background, bg_size, fg_scale=1.5): + sample = Image.open(sample) + background = Image.open(background) + background = imresize(background, bg_size) + bw, bh = background.size + # background = background.resize((int(d_w * scale), int(d_h * scale))) + new_w, new_h = int(bw / fg_scale), int(bh / fg_scale) + sample = imresize(sample, (new_w, new_h)) + + d_w, d_h = sample.size + x1, y1 = get_wh(0, bw - d_w), get_wh(0, bh - d_h) + background.paste(sample, (x1, y1)) + # draw = ImageDraw.Draw(background) + # draw.rectangle((x1 + 240, y1 + 254, x1 + 240 + 5, y1 + 254 + 5), 'red', 'green') + # draw.rectangle((x1 + 80, y1 + 28, x1 + 400, y1 + 480), None, 'green') + # background = background.resize((416, 416)) + + return np.array(background.convert('RGB'))[:, :, ::-1], (x1, y1), background, (d_w, d_h) diff --git a/train_instseg.py b/train_instseg.py new file mode 100644 index 000000000000..ff85f1eb36b5 --- /dev/null +++ b/train_instseg.py @@ -0,0 +1,680 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Train a YOLOv5 model on a custom dataset. + +Models and datasets download automatically from the latest YOLOv5 release. +Models: https://github.com/ultralytics/yolov5/tree/master/models +Datasets: https://github.com/ultralytics/yolov5/tree/master/data +Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data + +Usage: + $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED) + $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch +""" + +import argparse +import math +import os +import random +import sys +import time +from copy import deepcopy +from datetime import datetime +from pathlib import Path +from matplotlib.pyplot import plot + +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +import yaml +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.optim import SGD, Adam, AdamW, lr_scheduler +from tqdm import tqdm + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +import val # for end-of-epoch mAP +from models.experimental import attempt_load +from models.yolo import Model +from utils.autoanchor import check_anchors +from utils.autobatch import check_train_batch_size +from utils.callbacks import Callbacks +from seg_dataloaders import create_dataloader +from utils.downloads import attempt_download +from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness, + check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, + increment_path, init_seeds, intersect_dicts, labels_to_class_weights, + labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) +from utils.loggers import Loggers, NewLoggersMask +from utils.loggers.wandb.wandb_utils import check_wandb_resume +from utils.seg_loss import ComputeLoss +#from utils.metrics import fitness +from utils.plots import plot_evolve, plot_labels +from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) +from utils.general import LOGGER, check_amp, check_version +from utils.autobatch import check_train_batch_size +from torch.optim import AdamW +import yaml +from datetime import datetime +from distutils import dist +from evaluator import Yolov5Evaluator + +def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary + print(device) + save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ + Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ + opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio + callbacks.run('on_pretrain_routine_start') + + # Directories + w = save_dir / 'weights' # weights dir + (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir + last, best, last_mosiac = w / 'last.pt', w / 'best.pt', w / "last_mosaic.pt" + + # Hyperparameters + if isinstance(hyp, str): + with open(hyp, errors='ignore') as f: + hyp = yaml.safe_load(f) # load hyps dict + LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) + + # Save run settings + if not evolve: + with open(save_dir / 'hyp.yaml', 'w') as f: + yaml.safe_dump(hyp, f, sort_keys=False) + with open(save_dir / 'opt.yaml', 'w') as f: + yaml.safe_dump(vars(opt), f, sort_keys=False) + + # Loggers + data_dict = None + if RANK in {-1, 0}: + newloggers = NewLoggersMask + loggers = newloggers( + save_dir=save_dir, opt=opt, logger=LOGGER + ) # loggers instance + + # Register actions + for k in methods(loggers): + callbacks.register_action(k, callback=getattr(loggers, k)) + + # Config + plots = not evolve and not opt.noplots # create plots + cuda = device.type != 'cpu' + init_seeds(opt.seed + 1 + RANK) + with torch_distributed_zero_first(LOCAL_RANK): + data_dict = data_dict or check_dataset(data) # check if None + train_path, val_path = data_dict['train'], data_dict['val'] + nc = 1 if single_cls else int(data_dict['nc']) # number of classes + names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names + assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check + is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset + + # Model + check_suffix(weights, '.pt') # check weights + pretrained = weights.endswith('.pt') + if pretrained: + with torch_distributed_zero_first(LOCAL_RANK): + weights = attempt_download(weights) # download if not found locally + ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak + model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys + csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 + csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect + model.load_state_dict(csd, strict=False) # load + LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report + else: + model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + amp = check_amp(model) # check AMP + + # Freeze + freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze + for k, v in model.named_parameters(): + v.requires_grad = True # train all layers + if any(x in k for x in freeze): + LOGGER.info(f'freezing {k}') + v.requires_grad = False + + # Image size + gs = max(int(model.stride.max()), 32) # grid size (max stride) + imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple + + # Batch size + if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size + batch_size = check_train_batch_size(model, imgsz, amp) + loggers.on_params_update({"batch_size": batch_size}) + + # Optimizer + nbs = 64 # nominal batch size + accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing + hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay + LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") + evaluator = Yolov5Evaluator( + data = data, + single_cls=single_cls, + save_dir=save_dir, + mask=True, + verbose=False, + mask_downsample_ratio=mask_ratio, + plots=False + ) + g = [], [], [] # optimizer parameter groups + bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() + for v in model.modules(): + if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias + g[2].append(v.bias) + if isinstance(v, bn): # weight (no decay) + g[1].append(v.weight) + elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) + g[0].append(v.weight) + + if opt.optimizer == 'Adam': + optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + elif opt.optimizer == 'AdamW': + optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + else: + optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) + + optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay + optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights) + LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " + f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias") + del g + + # Scheduler + if opt.cos_lr: + lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] + else: + lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) + + # EMA + ema = ModelEMA(model) if RANK in {-1, 0} else None + + # Resume + start_epoch, best_fitness = 0, 0.0 + if pretrained: + # Optimizer + if ckpt['optimizer'] is not None: + optimizer.load_state_dict(ckpt['optimizer']) + best_fitness = ckpt['best_fitness'] + + # EMA + if ema and ckpt.get('ema'): + ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) + ema.updates = ckpt['updates'] + + # Epochs + start_epoch = ckpt['epoch'] + 1 + if resume: + assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.' + if epochs < start_epoch: + LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") + epochs += ckpt['epoch'] # finetune additional epochs + + del ckpt, csd + + # DP mode + if cuda and RANK == -1 and torch.cuda.device_count() > 1: + LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n' + 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') + model = torch.nn.DataParallel(model) + + # SyncBatchNorm + if opt.sync_bn and cuda and RANK != -1: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) + LOGGER.info('Using SyncBatchNorm()') + + # Trainloader + train_loader, dataset = create_dataloader(train_path, + imgsz, + batch_size // WORLD_SIZE, + gs, + single_cls, + hyp=hyp, + augment=True, + cache=None if opt.cache == 'val' else opt.cache, + rect=opt.rect, + rank=LOCAL_RANK, + workers=workers, + image_weights=opt.image_weights, + quad=opt.quad, + prefix=colorstr('train: '), + mask_head=True, + shuffle=True) + mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class + nb = len(train_loader) # number of batches + assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' + + # Process 0 + if RANK in {-1, 0}: + val_loader = create_dataloader(val_path, + imgsz, + batch_size // WORLD_SIZE * 2, + gs, + single_cls, + hyp=hyp, + cache=None if noval else opt.cache, + rect=True, + rank=-1, + workers=workers * 2, + pad=0.5, + mask_head=True, + prefix=colorstr('val: '))[0] + + if not resume: + labels = np.concatenate(dataset.labels, 0) + # c = torch.tensor(labels[:, 0]) # classes + # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency + # model._initialize_biases(cf.to(device)) + if plots: + plot_labels(labels, names, save_dir) + + # Anchors + if not opt.noautoanchor: + check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) + model.half().float() # pre-reduce anchor precision + + callbacks.run('on_pretrain_routine_end') + + # DDP mode + if cuda and RANK != -1: + if check_version(torch.__version__, '1.11.0'): + model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) + else: + model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) + + # Model attributes + nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) + hyp['box'] *= 3 / nl # scale to layers + hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers + hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers + hyp['label_smoothing'] = opt.label_smoothing + model.nc = nc # attach number of classes to model + model.hyp = hyp # attach hyperparameters to model + model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights + model.names = names + + # Start training + t0 = time.time() + nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) + # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training + last_opt_step = -1 + maps = np.zeros(nc) # mAP per class + results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + scheduler.last_epoch = start_epoch - 1 # do not move + scaler = torch.cuda.amp.GradScaler(enabled=amp) + stopper, stop = EarlyStopping(patience=opt.patience), False + compute_loss = ComputeLoss(model) # init loss class + callbacks.run('on_train_start') + LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' + f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' + f"Logging results to {colorstr('bold', save_dir)}\n" + f'Starting training for {epochs} epochs...') + for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ + callbacks.run('on_train_epoch_start') + model.train() + + # Update image weights (optional, single-GPU only) + if opt.image_weights: + cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights + iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights + dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx + + # Update mosaic border (optional) + # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) + # dataset.mosaic_border = [b - imgsz, -b] # height, width borders + + mloss = torch.zeros(4, device=device) # mean losses + if RANK != -1: + train_loader.sampler.set_epoch(epoch) + pbar = enumerate(train_loader) + LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) + if RANK in {-1, 0}: + pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar + optimizer.zero_grad() + for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------------- + callbacks.run('on_train_batch_start') + ni = i + nb * epoch # number integrated batches (since train start) + imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 + + # Warmup + if ni <= nw: + xi = [0, nw] # x interp + # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) + accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) + for j, x in enumerate(optimizer.param_groups): + # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 + x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) + if 'momentum' in x: + x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) + + # Multi-scale + if opt.multi_scale: + sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size + sf = sz / max(imgs.shape[2:]) # scale factor + if sf != 1: + ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) + imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) + + # Forward + with torch.cuda.amp.autocast(amp): + pred = model(imgs) # forward + loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device)) # loss scaled by batch_size + if RANK != -1: + loss *= WORLD_SIZE # gradient averaged between devices in DDP mode + if opt.quad: + loss *= 4. + + # Backward + scaler.scale(loss).backward() + + # Optimize + if ni - last_opt_step >= accumulate: + scaler.step(optimizer) # optimizer.step + scaler.update() + optimizer.zero_grad() + if ema: + ema.update(model) + last_opt_step = ni + + # Log + if RANK in {-1, 0}: + mloss = (mloss * i + loss_items) / (i + 1) # update mean losses + mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) + pbar.set_description(("%10s" * 2 + "%10.4g" * 6) + % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1])) + callbacks.run('on_train_batch_end', ni, model, imgs, targets,masks, paths, plots, opt.sync_bn, None) + + if callbacks.stop_training: + return + # end batch ------------------------------------------------------------------------------------------------ + + # Scheduler + lr = [x['lr'] for x in optimizer.param_groups] # for loggers + scheduler.step() + + if RANK in {-1, 0}: + # mAP + callbacks.run('on_train_epoch_end', epoch=epoch) + ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) + final_epoch = (epoch + 1 == epochs) or stopper.possible_stop + if not noval or final_epoch: # Calculate mAP + results, maps, _ = evaluator.run_training( + model=ema.ema, + dataloader=val_loader, + compute_loss=compute_loss, + ) + # Update best mAP + def fitness(x): + w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] + return (x[:, :8] * w).sum(1) + fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + stop = stopper(epoch=epoch, fitness=fi) # early stop check + if fi > best_fitness: + best_fitness = fi + log_vals = list(mloss) + list(results) + lr + callbacks.run('on_fit_epoch_end', log_vals, epoch) + + # Save model + if (not nosave) or (final_epoch and not evolve): # if save + ckpt = { + 'epoch': epoch, + 'best_fitness': best_fitness, + 'model': deepcopy(de_parallel(model)).half(), + 'ema': deepcopy(ema.ema).half(), + 'updates': ema.updates, + 'optimizer': optimizer.state_dict(), + #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, + 'date': datetime.now().isoformat()} + + # Save last, best and delete + torch.save(ckpt, last) + if best_fitness == fi: + torch.save(ckpt, best) + if opt.save_period > 0 and epoch % opt.save_period == 0: + torch.save(ckpt, w / f'epoch{epoch}.pt') + del ckpt + callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) + + # EarlyStopping + if RANK != -1: # if DDP training + broadcast_list = [stop if RANK == 0 else None] + dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks + if RANK != 0: + stop = broadcast_list[0] + if stop: + break # must break all DDP ranks + + # end epoch ---------------------------------------------------------------------------------------------------- + # end training ----------------------------------------------------------------------------------------------------- + if RANK in {-1, 0}: + LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') + for f in last, best: + if f.exists(): + strip_optimizer(f) # strip optimizers + if f is best: + LOGGER.info(f'\nValidating {f}...') + results, _, _ = evaluator.run_training( + model=attempt_load(f, device).half(), + dataloader=val_loader, + compute_loss=compute_loss, + ) # val best model with plots + if is_coco: + callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch) + + callbacks.run('on_train_end', last, best, plots, epoch, results, masks=True) + + torch.cuda.empty_cache() + return results + + + +def parse_opt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') + parser.add_argument('--cfg', type=str, default='', help='model.yaml path') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') + parser.add_argument('--epochs', type=int, default=300) + parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') + parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') + parser.add_argument('--rect', action='store_true', help='rectangular training') + parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') + parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') + parser.add_argument('--noval', action='store_true', help='only validate final epoch') + parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') + parser.add_argument('--noplots', action='store_true', help='save no plot files') + parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') + parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') + parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') + parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') + parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') + parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') + parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') + parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') + parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') + parser.add_argument('--name', default='exp', help='save to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--quad', action='store_true', help='quad dataloader') + parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') + parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') + parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') + parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') + parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') + parser.add_argument('--seed', type=int, default=0, help='Global training seed') + parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') + parser.add_argument('--mask-ratio', type=int, default=1, help='mask ratio') + + # Weights & Biases arguments + parser.add_argument('--entity', default=None, help='W&B: Entity') + parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') + parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') + parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') + + opt = parser.parse_known_args()[0] if known else parser.parse_args() + return opt + + +def main(opt, callbacks=Callbacks()): + # Checks + if RANK in {-1, 0}: + print_args(vars(opt)) + check_git_status() + check_requirements(exclude=['thop']) + + # Resume + if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run + ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path + assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' + with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: + opt = argparse.Namespace(**yaml.safe_load(f)) # replace + opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate + LOGGER.info(f'Resuming training from {ckpt}') + else: + opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ + check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks + assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' + if opt.evolve: + if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve + opt.project = str(ROOT / 'runs/evolve') + opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume + if opt.name == 'cfg': + opt.name = Path(opt.cfg).stem # use model.yaml as name + opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) + + # DDP mode + device = select_device(opt.device, batch_size=opt.batch_size) + if LOCAL_RANK != -1: + msg = 'is not compatible with YOLOv5 Multi-GPU DDP training' + assert not opt.image_weights, f'--image-weights {msg}' + assert not opt.evolve, f'--evolve {msg}' + assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' + assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + device = torch.device('cuda', LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + # Train + if not opt.evolve: + train(opt.hyp, opt, device, callbacks) + if WORLD_SIZE > 1 and RANK == 0: + LOGGER.info('Destroying process group... ') + dist.destroy_process_group() + + # Evolve hyperparameters (optional) + else: + # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) + meta = { + 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) + 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) + 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 + 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay + 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) + 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum + 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr + 'box': (1, 0.02, 0.2), # box loss gain + 'cls': (1, 0.2, 4.0), # cls loss gain + 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight + 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) + 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight + 'iou_t': (0, 0.1, 0.7), # IoU training threshold + 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold + 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) + 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) + 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) + 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) + 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) + 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) + 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) + 'scale': (1, 0.0, 0.9), # image scale (+/- gain) + 'shear': (1, 0.0, 10.0), # image shear (+/- deg) + 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 + 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) + 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) + 'mosaic': (1, 0.0, 1.0), # image mixup (probability) + 'mixup': (1, 0.0, 1.0), # image mixup (probability) + 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) + + with open(opt.hyp, errors='ignore') as f: + hyp = yaml.safe_load(f) # load hyps dict + if 'anchors' not in hyp: # anchors commented in hyp.yaml + hyp['anchors'] = 3 + opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch + # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices + evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' + if opt.bucket: + os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists + + for _ in range(opt.evolve): # generations to evolve + if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate + # Select parent(s) + parent = 'single' # parent selection method: 'single' or 'weighted' + x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) + n = min(5, len(x)) # number of previous results to consider + x = x[np.argsort(-fitness(x))][:n] # top n mutations + w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) + if parent == 'single' or len(x) == 1: + # x = x[random.randint(0, n - 1)] # random selection + x = x[random.choices(range(n), weights=w)[0]] # weighted selection + elif parent == 'weighted': + x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination + + # Mutate + mp, s = 0.8, 0.2 # mutation probability, sigma + npr = np.random + npr.seed(int(time.time())) + g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 + ng = len(meta) + v = np.ones(ng) + while all(v == 1): # mutate until a change occurs (prevent duplicates) + v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) + for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) + hyp[k] = float(x[i + 7] * v[i]) # mutate + + # Constrain to limits + for k, v in meta.items(): + hyp[k] = max(hyp[k], v[1]) # lower limit + hyp[k] = min(hyp[k], v[2]) # upper limit + hyp[k] = round(hyp[k], 5) # significant digits + + # Train mutation + results = train(hyp.copy(), opt, device, callbacks) + callbacks = Callbacks() + # Write mutation results + print_mutation(results, hyp.copy(), save_dir, opt.bucket) + + # Plot results + plot_evolve(evolve_csv) + LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' + f"Results saved to {colorstr('bold', save_dir)}\n" + f'Usage example: $ python train.py --hyp {evolve_yaml}') + + +def run(**kwargs): + # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt') + opt = parse_opt(True) + for k, v in kwargs.items(): + setattr(opt, k, v) + main(opt) + return opt + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) diff --git a/utils/general.py b/utils/general.py index a85a2915a31a..178e3073eb76 100755 --- a/utils/general.py +++ b/utils/general.py @@ -24,6 +24,7 @@ from subprocess import check_output from typing import Optional from zipfile import ZipFile +from PIL import ImageFont import cv2 import numpy as np @@ -444,7 +445,7 @@ def check_file(file, suffix=''): assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique return files[0] # return file - +''' def check_font(font=FONT, progress=False): # Download font to CONFIG_DIR if necessary font = Path(font) @@ -453,7 +454,18 @@ def check_font(font=FONT, progress=False): url = "https://ultralytics.com/assets/" + font.name LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, str(file), progress=progress) - +''' +def check_font(font="Arial.ttf", size=10, progress=False): + # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary + font = Path(font) + font = font if font.exists() else (CONFIG_DIR / font.name) + try: + return ImageFont.truetype(str(font) if font.exists() else font.name, size) + except Exception as e: # download if missing + url = "https://ultralytics.com/assets/" + font.name + print(f"Downloading {url} to {font}...") + torch.hub.download_url_to_file(url, str(font), progress=progress) + return ImageFont.truetype(str(font), size) def check_dataset(data, autodownload=True): # Download, check and/or unzip dataset if not found locally diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 42b696ba644f..bf95d82203b8 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -3,6 +3,7 @@ Logging utils """ +from ast import Import import os import warnings @@ -12,7 +13,7 @@ from utils.general import colorstr, cv2, emojis from utils.loggers.wandb.wandb_utils import WandbLogger -from utils.plots import plot_images, plot_results +from utils.plots import plot_images, plot_results, plot_results_with_masks, plot_images_and_masks from utils.torch_utils import de_parallel LOGGERS = ('csv', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases @@ -157,8 +158,9 @@ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1: self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) - def on_train_end(self, last, best, plots, epoch, results): + def on_train_end(self, last, best, plots, epoch, results, masks=False): # Callback runs on training end + plot_results = plot_results_with_masks if masks else plot_results if plots: plot_results(file=self.save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] @@ -185,3 +187,244 @@ def on_params_update(self, params): # params: A dict containing {param: value} pairs if self.wandb: self.wandb.wandb_run.config.update(params, allow_val_change=True) + +from threading import Thread + +class NewLoggers: + """Loggers without wandb, cause I don't really use `wandb` and `wandb` related codes are noisy.""" + def __init__( + self, + save_dir=None, + opt=None, + logger=None, + include=LOGGERS, + ): + self.save_dir = save_dir + self.opt = opt + self.logger = logger # for printing results to console + self.include = include + self.keys = [ + "train/box_loss", + "train/obj_loss", + "train/cls_loss", # train loss + "metrics/precision", + "metrics/recall", + "metrics/mAP_0.5", + "metrics/mAP_0.5:0.95", # metrics + "val/box_loss", + "val/obj_loss", + "val/cls_loss", # val loss + "x/lr0", + "x/lr1", + "x/lr2", + ] # params + self.best_keys = [ + "best/epoch", + "best/precision", + "best/recall", + "best/mAP_0.5", + "best/mAP_0.5:0.95", + ] + for k in LOGGERS: + setattr(self, k, None) # init empty logger dictionary + self.csv = True # always log to csv + + # TensorBoard + s = self.save_dir + if "tb" in self.include and s.exists(): + prefix = colorstr("TensorBoard: ") + self.logger.info( + f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/" + ) + self.tb = SummaryWriter(str(s)) + try: + import wandb + from wandb import __version__ + wandb.init(project="YOLOv5-Inst-seg", config=opt) + except ImportError: + wandb = None + pass + self.wandb = wandb + + def on_pretrain_routine_end(self): + pass + + def on_train_batch_end( + self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx + ): + # Callback runs on train batch end + if plots and self.save_dir.exists(): + if ni == 0: + if ( + not sync_bn + ): # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754 + with warnings.catch_warnings(): + warnings.simplefilter("ignore") # suppress jit trace warning + self.tb.add_graph( + torch.jit.trace( + de_parallel(model), imgs[0:1], strict=False + ), + [], + ) + if plot_idx is not None and ni in plot_idx: + f = self.save_dir / f"train_batch{ni}.jpg" # filename + Thread( + target=plot_images, args=(imgs, targets, paths, f), daemon=True + ).start() + # if ni < 3: + # f = self.save_dir / f'train_batch{ni}.jpg' # filename + # Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() + + def on_train_epoch_end(self, epoch): + # Callback runs on train epoch end + pass + + def on_val_image_end(self, imgs, targets, masks, paths): + # Callback runs on val image end + pass + + def on_val_end(self): + # Callback runs on val end + pass + + def on_fit_epoch_end(self, vals, epoch): + # Callback runs at the end of each fit (train+val) epoch + x = {k: v for k, v in zip(self.keys, vals)} # dict + if self.csv and self.save_dir.exists(): + file = self.save_dir / "results.csv" + n = len(x) + 1 # number of cols + s = ( + "" + if file.exists() + else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n") + ) # add header + with open(file, "a") as f: + f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n") + + if self.tb: + for k, v in x.items(): + self.tb.add_scalar(k, v, epoch) + if self.wandb: + wandb.log(x) + + def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): + # Callback runs on model save event + pass + + def on_train_end(self, plots, epoch, masks=False): + plts = plot_results_with_masks if masks else plot_results + # Callback runs on training end + if plots and self.save_dir.exists(): + plts(file=self.save_dir / "results.csv") # save results.png + files = [ + "results.png", + "confusion_matrix.png", + *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")), + ] + files = [ + (self.save_dir / f) for f in files if (self.save_dir / f).exists() + ] # filter + + if self.tb: + import cv2 + + for f in files: + self.tb.add_image( + f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats="HWC" + ) + + def on_params_update(self): + # Update hyperparams or configs of the experiment + # params: A dict containing {param: value} pairs + pass + +class NewLoggersMask(NewLoggers): + def __init__( + self, + save_dir=None, + opt=None, + logger=None, + include=LOGGERS, + ): + super().__init__(save_dir, opt, logger, include) + self.keys = [ + "train/box_loss", + "train/seg_loss", # train loss + "train/obj_loss", + "train/cls_loss", + "metrics/precision(B)", + "metrics/recall(B)", + "metrics/mAP_0.5(B)", + "metrics/mAP_0.5:0.95(B)", # metrics + "metrics/precision(M)", + "metrics/recall(M)", + "metrics/mAP_0.5(M)", + "metrics/mAP_0.5:0.95(M)", # metrics + "val/box_loss", + "val/seg_loss", # val loss + "val/obj_loss", + "val/cls_loss", + "x/lr0", + "x/lr1", + "x/lr2", + ] # params + self.best_keys = [ + "best/epoch", + "best/precision", + "best/recall", + "best/mAP_0.5", + "best/mAP_0.5:0.95", + ] + + + def on_train_batch_end( + self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx + ): + # Callback runs on train batch end + if plots and self.save_dir.exists(): + if ni == 0: + if ( + not sync_bn + ): # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754 + with warnings.catch_warnings(): + warnings.simplefilter("ignore") # suppress jit trace warning + self.tb.add_graph( + torch.jit.trace( + de_parallel(model), imgs[0:1], strict=False + ), + [], + ) + if plot_idx is not None and ni in plot_idx: + # if ni < 3: + f = self.save_dir / f"train_batch{ni}.jpg" # filename + Thread( + target=plot_images_and_masks, + args=(imgs, targets, masks, paths, f), + daemon=True, + ).start() + if ni==0: + if self.wandb: + res = plot_images_and_masks(imgs, targets, masks, paths) + wandb.log({"train_labels": wandb.Image(res)}) + + + + def on_fit_epoch_end(self, vals, epoch): + # Callback runs at the end of each fit (train+val) epoch + x = {k: v for k, v in zip(self.keys, vals)} # dict + if self.csv and self.save_dir.exists(): + file = self.save_dir / "results.csv" + n = len(x) + 1 # number of cols + s = ( + "" + if file.exists() + else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n") + ) # add header + with open(file, "a") as f: + f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n") + + if self.tb: + for k, v in x.items(): + self.tb.add_scalar(k, v, epoch) + if self.wandb: + wandb.log(x) diff --git a/utils/metrics.py b/utils/metrics.py index e17747b703fa..8646931bed00 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -5,6 +5,7 @@ import math import warnings +from easydict import EasyDict as edict from pathlib import Path import matplotlib.pyplot as plt @@ -12,22 +13,19 @@ import torch -def fitness(x): +def fitness(x, masks=False): # Model fitness as a weighted combination of metrics + if masks: + w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] + return (x[:, :8] * w).sum(1) w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] return (x[:, :4] * w).sum(1) -def smooth(y, f=0.05): - # Box filter of fraction f - nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) - p = np.ones(nf // 2) # ones padding - yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded - return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed - - -def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16): - """ Compute the average precision, given the recall and precision curves. +def ap_per_class( + tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix="" +): + """Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments tp: True positives (nparray, nx1 or nx10). @@ -35,7 +33,8 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names pred_cls: Predicted object classes (nparray). target_cls: True object classes (nparray). plot: Plot precision-recall curve at mAP@0.5 - save_dir: Plot save directory + save_dir: Plot save directory. + prefix: prefix. # Returns The average precision as computed in py-faster-rcnn. """ @@ -45,7 +44,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] # Find unique classes - unique_classes, nt = np.unique(target_cls, return_counts=True) + unique_classes = np.unique(target_cls) nc = unique_classes.shape[0] # number of classes, number of detections # Create Precision-Recall curve and compute AP for each class @@ -53,48 +52,114 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) for ci, c in enumerate(unique_classes): i = pred_cls == c - n_l = nt[ci] # number of labels + n_l = (target_cls == c).sum() # number of labels n_p = i.sum() # number of predictions + if n_p == 0 or n_l == 0: continue - - # Accumulate FPs and TPs - fpc = (1 - tp[i]).cumsum(0) - tpc = tp[i].cumsum(0) - - # Recall - recall = tpc / (n_l + eps) # recall curve - r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases - - # Precision - precision = tpc / (tpc + fpc) # precision curve - p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score - - # AP from recall-precision curve - for j in range(tp.shape[1]): - ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) - if plot and j == 0: - py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + 1e-16) # recall curve + r[ci] = np.interp( + -px, -conf[i], recall[:, 0], left=0 + ) # negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if plot and j == 0: + py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 # Compute F1 (harmonic mean of precision and recall) - f1 = 2 * p * r / (p + r + eps) - names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data - names = dict(enumerate(names)) # to dict - if plot: - plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) - plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') - plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') - plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') - - i = smooth(f1.mean(0), 0.1).argmax() # max F1 index - p, r, f1 = p[:, i], r[:, i], f1[:, i] - tp = (r * nt).round() # true positives - fp = (tp / (p + eps) - tp).round() # false positives - return tp, fp, p, r, f1, ap, unique_classes.astype(int) + f1 = 2 * p * r / (p + r + 1e-16) + names = [ + v for k, v in names.items() if k in unique_classes + ] # list: only classes that have data + names = {i: v for i, v in enumerate(names)} # to dict + if plot and save_dir is not None: + plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names) + plot_mc_curve( + px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1" + ) + plot_mc_curve( + px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision" + ) + plot_mc_curve( + px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall" + ) + + i = f1.mean(0).argmax() # max F1 index + return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32") + + +def ap_per_class_box_and_mask( + tp_m, + tp_b, + conf, + pred_cls, + target_cls, + plot=False, + save_dir=".", + names=(), +): + """ + Args: + tp_b: tp of boxes. + tp_m: tp of masks. + other arguments see `func: ap_per_class`. + """ + results_boxes = ap_per_class( + tp_b, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Box", + ) + results_masks = ap_per_class( + tp_m, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Mask", + ) + + results = edict( + { + "boxes": { + "p": results_boxes[0], + "r": results_boxes[1], + "ap": results_boxes[2], + "f1": results_boxes[3], + "ap_class": results_boxes[4], + }, + "masks": { + "p": results_masks[0], + "r": results_masks[1], + "ap": results_masks[2], + "f1": results_masks[3], + "ap_class": results_masks[4], + }, + } + ) + return results def compute_ap(recall, precision): - """ Compute the average precision, given the recall and precision curves + """Compute the average precision, given the recall and precision curves # Arguments recall: The recall curve (list) precision: The precision curve (list) @@ -110,8 +175,8 @@ def compute_ap(recall, precision): mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) # Integrate area under curve - method = 'interp' # methods: 'continuous', 'interp' - if method == 'interp': + method = "interp" # methods: 'continuous', 'interp' + if method == "interp": x = np.linspace(0, 1, 101) # 101-point interp (COCO) ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate else: # 'continuous' @@ -146,7 +211,11 @@ def process_batch(self, detections, labels): x = torch.where(iou > self.iou_thres) if x[0].shape[0]: - matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -156,7 +225,7 @@ def process_batch(self, detections, labels): matches = np.zeros((0, 3)) n = matches.shape[0] > 0 - m0, m1, _ = matches.transpose().astype(int) + m0, m1, _ = matches.transpose().astype(np.int16) for i, gc in enumerate(gt_classes): j = m0 == i if n and sum(j) == 1: @@ -172,91 +241,98 @@ def process_batch(self, detections, labels): def matrix(self): return self.matrix - def tp_fp(self): - tp = self.matrix.diagonal() # true positives - fp = self.matrix.sum(1) - tp # false positives - # fn = self.matrix.sum(0) - tp # false negatives (missed detections) - return tp[:-1], fp[:-1] # remove background class - - def plot(self, normalize=True, save_dir='', names=()): + def plot(self, normalize=True, save_dir="", names=()): try: import seaborn as sn - array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns + array = self.matrix / ( + (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1 + ) # normalize columns array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) fig = plt.figure(figsize=(12, 9), tight_layout=True) - nc, nn = self.nc, len(names) # number of classes, names - sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size - labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels + sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size + labels = (0 < len(names) < 99) and len( + names + ) == self.nc # apply names to ticklabels with warnings.catch_warnings(): - warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered - sn.heatmap(array, - annot=nc < 30, - annot_kws={ - "size": 8}, - cmap='Blues', - fmt='.2f', - square=True, - vmin=0.0, - xticklabels=names + ['background FP'] if labels else "auto", - yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1)) - fig.axes[0].set_xlabel('True') - fig.axes[0].set_ylabel('Predicted') - fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) + warnings.simplefilter( + "ignore" + ) # suppress empty matrix RuntimeWarning: All-NaN slice encountered + sn.heatmap( + array, + annot=self.nc < 30, + annot_kws={"size": 8}, + cmap="Blues", + fmt=".2f", + square=True, + xticklabels=names + ["background FP"] if labels else "auto", + yticklabels=names + ["background FN"] if labels else "auto", + ).set_facecolor((1, 1, 1)) + fig.axes[0].set_xlabel("True") + fig.axes[0].set_ylabel("Predicted") + fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250) plt.close() except Exception as e: - print(f'WARNING: ConfusionMatrix plot failure: {e}') + print(f"WARNING: ConfusionMatrix plot failure: {e}") def print(self): for i in range(self.nc + 1): - print(' '.join(map(str, self.matrix[i]))) + print(" ".join(map(str, self.matrix[i]))) -def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): - # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4) +def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 + box2 = box2.T # Get the coordinates of bounding boxes - if xywh: # transform from xywh to xyxy - (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, 1), box2.chunk(4, 1) - w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 - b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ - b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ - else: # x1, y1, x2, y2 = box1 - b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, 1) - b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, 1) - w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps - w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + if x1y1x2y2: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + else: # transform from xywh to xyxy + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 # Intersection area - inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ - (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( + torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) + ).clamp(0) # Union Area + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps union = w1 * h1 + w2 * h2 - inter + eps - # IoU iou = inter / union - if CIoU or DIoU or GIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width + if GIoU or DIoU or CIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min( + b1_x1, b2_x1 + ) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared - rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 - if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + rho2 = ( + (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 + ) / 4 # center distance squared + if DIoU: + return iou - rho2 / c2 # DIoU + elif ( + CIoU + ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow( + torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 + ) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU - return iou - rho2 / c2 # DIoU - c_area = cw * ch + eps # convex area - return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf - return iou # IoU - - -def box_area(box): - # box = xyxy(4,n) - return (box[2] - box[0]) * (box[3] - box[1]) + else: # GIoU https://arxiv.org/pdf/1902.09630.pdf + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU + else: + return iou # IoU def box_iou(box1, box2): @@ -272,28 +348,44 @@ def box_iou(box1, box2): IoU values for every element in boxes1 and boxes2 """ - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1) - inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) - - # IoU = inter / (area1 + area2 - inter) - return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter) + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + area1 = box_area(box1.T) + area2 = box_area(box2.T) -def bbox_ioa(box1, box2, eps=1E-7): - """ Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = ( + ( + torch.min(box1[:, None, 2:], box2[:, 2:]) + - torch.max(box1[:, None, :2], box2[:, :2]) + ) + .clamp(0) + .prod(2) + ) + return inter / ( + area1[:, None] + area2 - inter + ) # iou = inter / (area1 + area2 - inter) + + +def bbox_ioa(box1, box2, eps=1e-7): + """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 box1: np.array of shape(4) box2: np.array of shape(nx4) returns: np.array of shape(n) """ + box2 = box2.transpose() + # Get the coordinates of bounding boxes - b1_x1, b1_y1, b1_x2, b1_y2 = box1 - b2_x1, b2_y1, b2_x2, b2_y2 = box2.T + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] # Intersection area - inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ - (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) + inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * ( + np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1) + ).clip(0) # box2 area box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps @@ -307,49 +399,67 @@ def wh_iou(wh1, wh2): wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = torch.min(wh1, wh2).prod(2) # [N,M] - return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) + return inter / ( + wh1.prod(2) + wh2.prod(2) - inter + ) # iou = inter / (area1 + area2 - inter) # Plots ---------------------------------------------------------------------------------------------------------------- -def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()): +def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()): # Precision-recall curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) py = np.stack(py, axis=1) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py.T): - ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) + ax.plot( + px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}" + ) # plot(recall, precision) else: - ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) - - ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) - ax.set_xlabel('Recall') - ax.set_ylabel('Precision') + ax.plot(px, py, linewidth=1, color="grey") # plot(recall, precision) + + ax.plot( + px, + py.mean(1), + linewidth=3, + color="blue", + label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), + ) + ax.set_xlabel("Recall") + ax.set_ylabel("Precision") ax.set_xlim(0, 1) ax.set_ylim(0, 1) plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") - fig.savefig(save_dir, dpi=250) + fig.savefig(Path(save_dir), dpi=250) plt.close() -def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'): +def plot_mc_curve( + px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric" +): # Metric-confidence curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py): - ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) + ax.plot(px, y, linewidth=1, label=f"{names[i]}") # plot(confidence, metric) else: - ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) - - y = smooth(py.mean(0), 0.05) - ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') + ax.plot(px, py.T, linewidth=1, color="grey") # plot(confidence, metric) + + y = py.mean(0) + ax.plot( + px, + y, + linewidth=3, + color="blue", + label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", + ) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(0, 1) ax.set_ylim(0, 1) plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") - fig.savefig(save_dir, dpi=250) + fig.savefig(Path(save_dir), dpi=250) plt.close() diff --git a/utils/plots.py b/utils/plots.py index 1bbb9c09c33a..94e59fc8866c 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -487,3 +487,902 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, # cv2.imwrite(f, crop) # https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)).save(f, quality=95, subsampling=0) return crop + + +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Plotting utils +""" + +import math +import os +from copy import copy +from pathlib import Path + +import cv2 +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sn +import torch +from PIL import Image, ImageDraw +from itertools import repeat + +from .metrics import fitness + +# Settings +RANK = int(os.getenv("RANK", -1)) +matplotlib.rc("font", **{"size": 11}) +matplotlib.use("Agg") # for writing to files only + + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + # hex = matplotlib.colors.TABLEAU_COLORS.values() + hex = ( + "FF3838", + "FF9D97", + "FF701F", + "FFB21D", + "CFD231", + "48F90A", + "92CC17", + "3DDB86", + "1A9334", + "00D4BB", + "2C99A8", + "00C2FF", + "344593", + "6473FF", + "0018EC", + "8438FF", + "520085", + "CB38FF", + "FF95C8", + "FF37C7", + ) + self.palette = [self.hex2rgb("#" + c) for c in hex] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): # rgb order (PIL) + return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)) + + +colors = Colors() # create instance for 'from utils.plots import colors' + + +class Annotator: + if RANK in (-1, 0): + check_font() # download TTF if necessary + + # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations + def __init__( + self, + im, + line_width=None, + font_size=None, + font="Arial.ttf", + pil=False, + example="abc", + ): + assert ( + im.data.contiguous + ), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images." + self.pil = pil or not is_ascii(example) + if self.pil: # use PIL + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + self.font = check_font( + font="Arial.Unicode.ttf", + ) + else: # use cv2 + self.im = im + self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width + + def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)): + # Add one xyxy box to image with label + if self.pil or not is_ascii(label): + self.draw.rectangle(box, width=self.lw, outline=color) # box + if label: + w, h = self.font.getsize(label) # text width, height + outside = box[1] - h >= 0 # label fits outside box + self.draw.rectangle( + [ + box[0], + box[1] - h if outside else box[1], + box[0] + w + 1, + box[1] + 1 if outside else box[1] + h + 1, + ], + fill=color, + ) + # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 + self.draw.text( + (box[0], box[1] - h if outside else box[1]), + label, + fill=txt_color, + font=self.font, + ) + else: # cv2 + p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) + if label: + tf = max(self.lw - 1, 1) # font thickness + w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[ + 0 + ] # text width, height + outside = p1[1] - h - 3 >= 0 # label fits outside box + p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 + cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled + cv2.putText( + self.im, + label, + (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), + 0, + self.lw / 3, + txt_color, + thickness=tf, + lineType=cv2.LINE_AA, + ) + + def rectangle(self, xy, fill=None, outline=None, width=1): + # Add rectangle to image (PIL-only) + self.draw.rectangle(xy, fill, outline, width) + + def text(self, xy, text, txt_color=(255, 255, 255)): + # Add text to image (PIL-only) + w, h = self.font.getsize(text) # text width, height + self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font) + + def result(self): + # Return annotated image as array + return np.asarray(self.im) + +class Visualizer(object): + """Visualization of one model.""" + def __init__(self, names) -> None: + super().__init__() + self.names = names + + def draw_one_img(self, img, output, vis_conf=0.4): + """Visualize one images. + + Args: + imgs (numpy.ndarray): one image. + outputs (torch.Tensor): one output, (num_boxes, classes+5) + vis_confs (float, optional): Visualize threshold. + Return: + img (numpy.ndarray): Image after visualization. + """ + if isinstance(output, list): + output = output[0] + if output is None or len(output) == 0: + return img + for (*xyxy, conf, cls) in reversed(output[:, :6]): + if conf < vis_conf: + continue + label = '%s %.2f' % (self.names[int(cls)], conf) + color = colors(int(cls)) + plot_one_box(xyxy, img, label=label, + color=color, + line_thickness=2) + return img + + def draw_multi_img(self, imgs, outputs, vis_confs=0.4): + """Visualize multi images. + + Args: + imgs (List[numpy.array]): multi images. + outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5]. + vis_confs (float | tuple[float], optional): Visualize threshold. + Return: + imgs (List[numpy.ndarray]): Images after visualization. + """ + if isinstance(vis_confs, float): + vis_confs = list(repeat(vis_confs, len(imgs))) + assert len(imgs) == len(outputs) == len(vis_confs) + for i, output in enumerate(outputs): # detections per image + self.draw_one_img(imgs[i], output, vis_confs[i]) + return imgs + + def draw_imgs(self, imgs, outputs, vis_confs=0.4): + if isinstance(imgs, np.ndarray): + return self.draw_one_img(imgs, outputs, vis_confs) + else: + return self.draw_multi_img(imgs, outputs, vis_confs) + + def __call__(self, imgs, outputs, vis_confs=0.4): + return self.draw_imgs(imgs, outputs, vis_confs) + + +def hist2d(x, y, n=100): + # 2d histogram used in labels.png and evolve.png + xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) + hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) + xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) + yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) + return np.log(hist[xidx, yidx]) + + +def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): + from scipy.signal import butter, filtfilt + + # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy + def butter_lowpass(cutoff, fs, order): + nyq = 0.5 * fs + normal_cutoff = cutoff / nyq + return butter(order, normal_cutoff, btype="low", analog=False) + + b, a = butter_lowpass(cutoff, fs, order=order) + return filtfilt(b, a, data) # forward-backward filter + + +def output_to_target(output): + # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] + targets = [] + for i, o in enumerate(output): + for *box, conf, cls in o.cpu().numpy()[:, :6]: + targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) + return np.array(targets) + + +def plot_images( + images, + targets, + paths=None, + fname="images.jpg", + names=None, + max_size=1920, + max_subplots=16, +): + # Plot image grid with labels + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + if np.max(images[0]) <= 1: + images *= 255.0 # de-normalise (optional) + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + + # Build Image + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, im in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y : y + h, x : x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int((h + w) * ns * 0.01) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text( + (x + 5, y + 5 + h), + text=Path(paths[i]).name[:40], + txt_color=(220, 220, 220), + ) # filenames + if len(targets) > 0: + ti = targets[targets[:, 0] == i] # image targets + boxes = xywh2xyxy(ti[:, 2:6]).T + classes = ti[:, 1].astype("int") + labels = ti.shape[1] == 6 # labels if no conf column + conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + cls = classes[j] + color = colors(cls) + cls = names[cls] if names else cls + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}" + annotator.box_label(box, label, color=color) + annotator.im.save(fname) # save + return annotator.result() + + +def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""): + # Plot LR simulating training for full epochs + optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals + y = [] + for _ in range(epochs): + scheduler.step() + y.append(optimizer.param_groups[0]["lr"]) + plt.plot(y, ".-", label="LR") + plt.xlabel("epoch") + plt.ylabel("LR") + plt.grid() + plt.xlim(0, epochs) + plt.ylim(0) + plt.savefig(Path(save_dir) / "LR.png", dpi=200) + plt.close() + + +def plot_val_txt(): # from utils.plots import *; plot_val() + # Plot val.txt histograms + x = np.loadtxt("val.txt", dtype=np.float32) + box = xyxy2xywh(x[:, :4]) + cx, cy = box[:, 0], box[:, 1] + + fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) + ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) + ax.set_aspect("equal") + plt.savefig("hist2d.png", dpi=300) + + fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) + ax[0].hist(cx, bins=600) + ax[1].hist(cy, bins=600) + plt.savefig("hist1d.png", dpi=200) + + +def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() + # Plot targets.txt histograms + x = np.loadtxt("targets.txt", dtype=np.float32).T + s = ["x targets", "y targets", "width targets", "height targets"] + fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) + ax = ax.ravel() + for i in range(4): + ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std())) + ax[i].legend() + ax[i].set_title(s[i]) + plt.savefig("targets.jpg", dpi=200) + + +def plot_val_study(file="", dir="", x=None): # from utils.plots import *; plot_val_study() + # Plot file=study.txt generated by val.py (or plot all study*.txt in dir) + save_dir = Path(file).parent if file else Path(dir) + plot2 = False # plot additional results + if plot2: + ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel() + + fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) + # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]: + for f in sorted(save_dir.glob("study*.txt")): + y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T + x = np.arange(y.shape[1]) if x is None else np.array(x) + if plot2: + s = [ + "P", + "R", + "mAP@.5", + "mAP@.5:.95", + "t_preprocess (ms/img)", + "t_inference (ms/img)", + "t_NMS (ms/img)", + ] + for i in range(7): + ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8) + ax[i].set_title(s[i]) + + j = y[3].argmax() + 1 + ax2.plot( + y[5, 1:j], + y[3, 1:j] * 1e2, + ".-", + linewidth=2, + markersize=8, + label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"), + ) + + ax2.plot( + 1e3 / np.array([209, 140, 97, 58, 35, 18]), + [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], + "k.-", + linewidth=2, + markersize=8, + alpha=0.25, + label="EfficientDet", + ) + + ax2.grid(alpha=0.2) + ax2.set_yticks(np.arange(20, 60, 5)) + ax2.set_xlim(0, 57) + ax2.set_ylim(25, 55) + ax2.set_xlabel("GPU Speed (ms/img)") + ax2.set_ylabel("COCO AP val") + ax2.legend(loc="lower right") + f = save_dir / "study.png" + print(f"Saving {f}...") + plt.savefig(f, dpi=300) + + +def plot_labels(labels, names=(), save_dir=Path("")): + # plot dataset labels + print("Plotting labels... ") + c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes + nc = int(c.max() + 1) # number of classes + x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"]) + + # seaborn correlogram + sn.pairplot( + x, + corner=True, + diag_kind="auto", + kind="hist", + diag_kws=dict(bins=50), + plot_kws=dict(pmax=0.9), + ) + plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200) + plt.close() + + # matplotlib labels + matplotlib.use("svg") # faster + ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() + y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) + # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # update colors bug #3195 + ax[0].set_ylabel("instances") + if 0 < len(names) < 30: + ax[0].set_xticks(range(len(names))) + ax[0].set_xticklabels(names, rotation=90, fontsize=10) + else: + ax[0].set_xlabel("classes") + sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9) + sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9) + + # rectangles + labels[:, 1:3] = 0.5 # center + labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 + img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) + for cls, *box in labels[:1000]: + ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot + ax[1].imshow(img) + ax[1].axis("off") + + for a in [0, 1, 2, 3]: + for s in ["top", "right", "left", "bottom"]: + ax[a].spines[s].set_visible(False) + + plt.savefig(save_dir / "labels.jpg", dpi=200) + matplotlib.use("Agg") + plt.close() + + +def profile_idetection(start=0, stop=0, labels=(), save_dir=""): + # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() + ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() + s = [ + "Images", + "Free Storage (GB)", + "RAM Usage (GB)", + "Battery", + "dt_raw (ms)", + "dt_smooth (ms)", + "real-world FPS", + ] + files = list(Path(save_dir).glob("frames*.txt")) + for fi, f in enumerate(files): + try: + results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows + n = results.shape[1] # number of rows + x = np.arange(start, min(stop, n) if stop else n) + results = results[:, x] + t = results[0] - results[0].min() # set t0=0s + results[0] = x + for i, a in enumerate(ax): + if i < len(results): + label = labels[fi] if len(labels) else f.stem.replace("frames_", "") + a.plot( + t, + results[i], + marker=".", + label=label, + linewidth=1, + markersize=5, + ) + a.set_title(s[i]) + a.set_xlabel("time (s)") + # if fi == len(files) - 1: + # a.set_ylim(bottom=0) + for side in ["top", "right"]: + a.spines[side].set_visible(False) + else: + a.remove() + except Exception as e: + print("Warning: Plotting error for %s; %s" % (f, e)) + ax[1].legend() + plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200) + + +def plot_evolve( + evolve_csv="path/to/evolve.csv", +): # from utils.plots import *; plot_evolve() + # Plot evolve.csv hyp evolution results + evolve_csv = Path(evolve_csv) + data = pd.read_csv(evolve_csv) + keys = [x.strip() for x in data.columns] + x = data.values + f = fitness(x) + j = np.argmax(f) # max fitness index + plt.figure(figsize=(10, 12), tight_layout=True) + matplotlib.rc("font", **{"size": 8}) + for i, k in enumerate(keys[7:]): + v = x[:, 7 + i] + mu = v[j] # best single result + plt.subplot(6, 5, i + 1) + plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none") + plt.plot(mu, f.max(), "k+", markersize=15) + plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9}) # limit to 40 characters + if i % 5 != 0: + plt.yticks([]) + print("%15s: %.3g" % (k, mu)) + f = evolve_csv.with_suffix(".png") # filename + plt.savefig(f, dpi=200) + plt.close() + print(f"Saved {f}") + + +def plot_results(file="path/to/results.csv", dir="", best=True): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob("results*.csv")) + assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." + for _, f in enumerate(files): + try: + data = pd.read_csv(f) + index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6]) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]): + y = data.values[:, j] + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) + if best: + # best + ax[i].scatter( + index, + y[index], + color="r", + label=f"best:{index}", + marker="*", + linewidth=3, + ) + ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") + else: + # last + ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) + ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") + # if j in [8, 9, 10]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print(f"Warning: Plotting error for {f}: {e}") + ax[1].legend() + fig.savefig(save_dir / "results.png", dpi=200) + plt.close() + + +def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob("results*.csv")) + assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." + for _, f in enumerate(files): + try: + data = pd.read_csv(f) + index = np.argmax( + 0.9 * data.values[:, 8] + + 0.1 * data.values[:, 7] + + 0.9 * data.values[:, 12] + + 0.1 * data.values[:, 11], + ) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): + y = data.values[:, j] + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) + if best: + # best + ax[i].scatter( + index, + y[index], + color="r", + label=f"best:{index}", + marker="*", + linewidth=3, + ) + ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") + else: + # last + ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) + ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") + # if j in [8, 9, 10]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print(f"Warning: Plotting error for {f}: {e}") + ax[1].legend() + fig.savefig(save_dir / "results.png", dpi=200) + plt.close() + + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + import random + + # Plots one bounding box on image img + tl = ( + line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 + ) # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText( + img, + label, + (c1[0], c1[1] - 2), + 0, + tl / 3, + [225, 255, 255], + thickness=tf, + lineType=cv2.LINE_AA, + ) + + +def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")): + """ + x: Features to be visualized + module_type: Module type + stage: Module stage within model + n: Maximum number of feature maps to plot + save_dir: Directory to save results + """ + if "Detect" not in module_type: + batch, channels, height, width = x.shape # batch, channels, height, width + if height > 1 and width > 1: + f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename + + blocks = torch.chunk( + x[0].cpu(), channels, dim=0 + ) # select batch index 0, block by channels + n = min(n, channels) # number of plots + fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols + ax = ax.ravel() + plt.subplots_adjust(wspace=0.05, hspace=0.05) + for i in range(n): + ax[i].imshow(blocks[i].squeeze()) # cmap='gray' + ax[i].axis("off") + + print(f"Saving {save_dir / f}... ({n}/{channels})") + plt.savefig(save_dir / f, dpi=300, bbox_inches="tight") + plt.close() + + +def plot_images_and_masks( + images, + targets, + masks, + paths=None, + fname="images.jpg", + names=None, + max_size=640, + max_subplots=16, +): + # Plot image grid with labels + # print("targets:", targets.shape) + # print("masks:", masks.shape) + # print('--------------------------') + + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + if isinstance(masks, torch.Tensor): + masks = masks.cpu().numpy() + masks = masks.astype(int) + + # un-normalise + if np.max(images[0]) <= 1: + images *= 255 + + tl = 3 # line thickness + tf = max(tl - 1, 1) # font thickness + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + + # Check if we should resize + scale_factor = max_size / max(h, w) + if scale_factor < 1: + h = math.ceil(scale_factor * h) + w = math.ceil(scale_factor * w) + + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, img in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + + block_x = int(w * (i // ns)) + block_y = int(h * (i % ns)) + + img = img.transpose(1, 2, 0) + if scale_factor < 1: + img = cv2.resize(img, (w, h)) + + mosaic[block_y : block_y + h, block_x : block_x + w, :] = img + if len(targets) > 0: + idx = (targets[:, 0]).astype(int) + image_targets = targets[idx == i] + # print(targets.shape) + # print(masks.shape) + image_masks = masks[idx == i] + # mosaic_masks + # mosaic_masks[block_y:block_y + h, + # block_x:block_x + w, :] = image_masks + boxes = xywh2xyxy(image_targets[:, 2:6]).T + classes = image_targets[:, 1].astype("int") + labels = image_targets.shape[1] == 6 # labels if no conf column + conf = ( + None if labels else image_targets[:, 6] + ) # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale_factor < 1: # absolute coords need scale if image scales + boxes *= scale_factor + boxes[[0, 2]] += block_x + boxes[[1, 3]] += block_y + for j, box in enumerate(boxes.T): + cls = int(classes[j]) + color = colors(cls) + cls = names[cls] if names else cls + mask = image_masks[j].astype(np.bool) + # print(mask.shape) + # print(mosaic.shape) + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j]) + plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) + mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[ + block_y : block_y + h, block_x : block_x + w, : + ][mask] * 0.35 + (np.array(color) * 0.65) + + # Draw image filename labels + if paths: + label = Path(paths[i]).name[:40] # trim to 40 char + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + cv2.putText( + mosaic, + label, + (block_x + 5, block_y + t_size[1] + 5), + 0, + tl / 3, + [220, 220, 220], + thickness=tf, + lineType=cv2.LINE_AA, + ) + + # Image border + cv2.rectangle( + mosaic, + (block_x, block_y), + (block_x + w, block_y + h), + (255, 255, 255), + thickness=3, + ) + + if fname: + r = min(1280.0 / max(h, w) / ns, 1.0) # ratio to limit image size + mosaic = cv2.resize( + mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA + ) + # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save + Image.fromarray(mosaic).save(fname) # PIL save + return mosaic + + +def plot_images_boxes_and_masks( + images, + targets, + masks=None, + paths=None, + fname="images.jpg", + names=None, + max_size=640, + max_subplots=16, +): + if masks is not None: + return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots) + else: + return plot_images(images, targets, paths, fname, names, max_size, max_subplots) + + +def plot_masks(img, masks, colors, alpha=0.5): + """ + Args: + img (tensor): img on cuda, shape: [3, h, w], range: [0, 1] + masks (tensor): predicted masks on cuda, shape: [n, h, w] + colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] + Return: + img after draw masks, shape: [h, w, 3] + + transform colors and send img_gpu to cpu for the most time. + """ + img_gpu = img.clone() + num_masks = len(masks) + # [n, 1, 1, 3] + # faster this way to transform colors + colors = torch.tensor(colors, device=img.device).float() / 255.0 + colors = colors[:, None, None, :] + # [n, h, w, 1] + masks = masks[:, :, :, None] + masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha + inv_alph_masks = masks * (-alpha) + 1 + masks_color_summand = masks_color[0] + if num_masks > 1: + inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0) + masks_color_cumul = masks_color[1:] * inv_alph_cumul + masks_color_summand += masks_color_cumul.sum(dim=0) + + # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1] + img_gpu = img_gpu.flip(dims=[0]) # filp channel for opencv + img_gpu = img_gpu.permute(1, 2, 0).contiguous() + # [h, w, 3] + img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand + return (img_gpu * 255).byte().cpu().numpy() + +def visualize(self, images, outputs, out_masks, vis_confs=0.4): + """Image visualize + if images is a List of ndarray, then will return a List. + if images is a ndarray, then return ndarray. + Args: + outputs: bbox+conf+cls, List[torch.Tensor(num_boxes, 6)]xB. + masks: binary masks, List[torch.Tensor(num_boxes, img_h, img_w)]xB. + """ + ori_type = type(images) + # get original shape, cause self.ori_hw will be cleared + images = images if isinstance(images, list) else [images] + ori_hw = [img.shape[:2] for img in images] + # init the list to keep image with masks. + # TODO: fix this bug when output is empty. + masks_images = [] + # draw masks + for i, output in enumerate(outputs): + if output is None or len(output) == 0: + continue + idx = output[:, 4] > vis_confs + masks = out_masks[i][idx] + mcolors = [colors(int(cls)) for cls in output[:, 5]] + # NOTE: this way to draw masks is faster, + # from https://github.com/dbolya/yolact + # image with masks, (img_h, img_w, 3) + img_masks = plot_masks(self.imgs[i], masks, mcolors) + # scale image to original hw + from utils.segment import scale_masks + img_masks = scale_masks(self.imgs[i].shape[1:], img_masks, ori_hw[i]) + masks_images.append(img_masks) + # TODO: make this(ori_type stuff) clean + images = masks_images[0] if (len(masks_images) == 1) and type(masks_images) != ori_type else images[0] + return self.vis(images, outputs, vis_confs) \ No newline at end of file diff --git a/utils/seg_loss.py b/utils/seg_loss.py new file mode 100644 index 000000000000..d4cf26401bc6 --- /dev/null +++ b/utils/seg_loss.py @@ -0,0 +1,459 @@ +import torch +from utils.torch_utils import de_parallel, is_parallel +from utils.general import xywh2xyxy +from utils.segment import mask_iou, masks_iou, crop +import torch.nn.functional as F +import torch.nn as nn +from utils.loss import smooth_BCE, FocalLoss + + +class ComputeLoss: + # Compute losses + def __init__(self, model, autobalance=False): + self.sort_obj_iou = False + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device)) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["obj_pw"]], device=device)) + + self.mask_loss = MaskIOULoss() + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE( + eps=h.get("label_smoothing", 0.0) + ) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) + + det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = ( + BCEcls, + BCEobj, + 1.0, + h, + autobalance, + ) + for k in "na", "nc", "nl", "anchors", "nm": + if hasattr(det, k): + setattr(self, k, getattr(det, k)) + + def __call__(self, p, targets, masks=None): # predictions, targets, model + if masks is not None: + return self.loss_segment(p, targets, masks) + return self.loss_detection(p, targets) + + def loss_detection(self, p, targets): + device = targets.device + lcls, lbox, lobj = ( + torch.zeros(1, device=device), + torch.zeros(1, device=device), + torch.zeros(1, device=device), + ) + tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets + + # Losses + for i, pi in enumerate(p): # layer index, layer predictions + b, a, gj, gi = indices[i] # image, anchor, gridy, gridx + tobj = torch.zeros_like(pi[..., 0], device=device) # target obj + + n = b.shape[0] # number of targets + if n: + ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + + # Regression + pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 + pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + pbox = torch.cat((pxy, pwh), 1) # predicted box + iou = bbox_iou( + pbox.T, tbox[i], x1y1x2y2=False, CIoU=True + ) # iou(prediction, target) + lbox += (1.0 - iou).mean() # iou loss + + # Objectness + score_iou = iou.detach().clamp(0).type(tobj.dtype) + if self.sort_obj_iou: + sort_id = torch.argsort(score_iou) + b, a, gj, gi, score_iou = ( + b[sort_id], + a[sort_id], + gj[sort_id], + gi[sort_id], + score_iou[sort_id], + ) + tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets + t[range(n), tcls[i]] = self.cp + lcls += self.BCEcls(ps[:, 5:], t) # BCE + + # Append targets to text file + # with open('targets.txt', 'a') as file: + # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] + + obji = self.BCEobj(pi[..., 4], tobj) + lobj += obji * self.balance[i] # obj loss + if self.autobalance: + self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + + if self.autobalance: + self.balance = [x / self.balance[self.ssi] for x in self.balance] + lbox *= self.hyp["box"] + lobj *= self.hyp["obj"] + lcls *= self.hyp["cls"] + bs = tobj.shape[0] # batch size + + return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach() + + def loss_segment(self, preds, targets, masks): + """ + proto_out:[batch-size, mask_dim, mask_hegiht, mask_width] + masks:[batch-size * num_objs, image_height, image_width] + 每张图片objects数量不同,到时候处理时填充不足的 + """ + p = preds[0] + proto_out = preds[1] + mask_h, mask_w = proto_out.shape[2:] + proto_out = proto_out.permute(0, 2, 3, 1) + + device = targets.device + lcls, lbox, lobj, lseg = ( + torch.zeros(1, device=device), + torch.zeros(1, device=device), + torch.zeros(1, device=device), + torch.zeros(1, device=device), + ) + tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks( + p, targets + ) # targets + + # Losses + for i, pi in enumerate(p): # layer index, layer predictions + b, a, gj, gi = indices[i] # image, anchor, gridy, gridx + tobj = torch.zeros_like(pi[..., 0], device=device) # target obj + + n = b.shape[0] # number of targets + if n: + ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + + # Regression + pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 + pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + pbox = torch.cat((pxy, pwh), 1) # predicted box + iou = bbox_iou( + pbox.T, tbox[i], x1y1x2y2=False, CIoU=True + ) # iou(prediction, target) + lbox += (1.0 - iou).mean() # iou loss + + # Objectness + score_iou = iou.detach().clamp(0).type(tobj.dtype) + if self.sort_obj_iou: + sort_id = torch.argsort(score_iou) + b, a, gj, gi, score_iou = ( + b[sort_id], + a[sort_id], + gj[sort_id], + gi[sort_id], + score_iou[sort_id], + ) + tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = torch.full_like(ps[:, self.nm :], self.cn, device=device) # targets + t[range(n), tcls[i]] = self.cp + lcls += self.BCEcls(ps[:, self.nm :], t) # BCE + + # Mask Regression + mask_gt = masks[tidxs[i]] + downsampled_masks = F.interpolate( + mask_gt[None, :], + (mask_h, mask_w), + mode="bilinear", + align_corners=False, + ).squeeze(0) + + mxywh = xywh[i] + mws, mhs = mxywh[:, 2:].T + mws, mhs = mws / pi.shape[3], mhs / pi.shape[2] + mxywhs = ( + mxywh + / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] + * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device) + ) + mxyxys = xywh2xyxy(mxywhs) + + batch_lseg = torch.zeros(1, device=device) + for bi in b.unique(): + index = b == bi + mask_gti = downsampled_masks[index] + mask_gti = mask_gti.permute(1, 2, 0).contiguous() + + mw, mh = mws[index], mhs[index] + mxyxy = mxyxys[index] + psi = ps[index][:, 5 : self.nm] + proto = proto_out[bi] + + batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) + lseg += batch_lseg / len(b.unique()) + + obji = self.BCEobj(pi[..., 4], tobj) + lobj += obji * self.balance[i] # obj loss + if self.autobalance: + self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + + if self.autobalance: + self.balance = [x / self.balance[self.ssi] for x in self.balance] + lbox *= self.hyp["box"] + lobj *= self.hyp["obj"] + lcls *= self.hyp["cls"] + lseg *= self.hyp["box"] + bs = tobj.shape[0] # batch size + + loss = lbox + lobj + lcls + lseg + return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): + """mask loss of single pic.""" + # (80, 80, 32) @ (32, n) -> (80, 80, n) + pred_mask = proto @ pred.tanh().T + lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") + lseg = crop(lseg, xyxy) + lseg = lseg.mean(dim=(0, 1)) / w / h + return lseg.mean() + + def mask_loss(self, gt_masks, preds, protos, xyxys, ws, hs): + """mask loss of batches.""" + pass + + def build_targets(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + na, nt = self.na, targets.shape[0] # number of anchors, targets + tcls, tbox, indices, anch = [], [], [], [] + gain = torch.ones(7, device=targets.device) # normalized to gridspace gain + ai = ( + torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) + ) # same as .repeat_interleave(nt) + targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices + + g = 0.5 # bias + off = ( + torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=targets.device, + ).float() + * g + ) # offsets + + for i in range(self.nl): + anchors = self.anchors[i] + gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain + if nt: + # Matches + r = t[:, :, 4:6] / anchors[:, None] # wh ratio + j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"] # compare + # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) + t = t[j] # filter + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T + l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T + j = torch.stack((torch.ones_like(j), j, k, l, m)) + t = t.repeat((5, 1, 1))[j] + offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + else: + t = targets[0] + offsets = 0 + + # Define + b, c = t[:, :2].long().T # image, class + gxy = t[:, 2:4] # grid xy + gwh = t[:, 4:6] # grid wh + gij = (gxy - offsets).long() + gi, gj = gij.T # grid xy indices + + # Append + a = t[:, 6].long() # anchor indices + indices.append( + (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)) + ) # image, anchor, grid indices + tbox.append(torch.cat((gxy - gij, gwh), 1)) # box + anch.append(anchors[a]) # anchors + tcls.append(c) # class + + return tcls, tbox, indices, anch + + def build_targets_for_masks(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + na, nt = self.na, targets.shape[0] # number of anchors, targets + tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], [] + gain = torch.ones(8, device=targets.device) # normalized to gridspace gain + ai = ( + torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) + ) # same as .repeat_interleave(nt) + ti = ( + torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1) + ) # same as .repeat_interleave(nt) + + targets = torch.cat( + (targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2 + ) # append anchor indices + + g = 0.5 # bias + off = ( + torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=targets.device, + ).float() + * g + ) # offsets + + for i in range(self.nl): + anchors = self.anchors[i] + gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain + if nt: + # Matches + r = t[:, :, 4:6] / anchors[:, None] # wh ratio + j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"] # compare + # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) + t = t[j] # filter + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T + l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T + j = torch.stack((torch.ones_like(j), j, k, l, m)) + t = t.repeat((5, 1, 1))[j] + offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + else: + t = targets[0] + offsets = 0 + + # Define + b, c = t[:, :2].long().T # image, class + gxy = t[:, 2:4] # grid xy + gwh = t[:, 4:6] # grid wh + gij = (gxy - offsets).long() + gi, gj = gij.T # grid xy indices + + # Append + a = t[:, 6].long() # anchor indices + tidx = t[:, 7].long() + indices.append( + (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)) + ) # image, anchor, grid indices + tbox.append(torch.cat((gxy - gij, gwh), 1)) # box + anch.append(anchors[a]) # anchors + tcls.append(c) # class + tidxs.append(tidx) + xywh.append(torch.cat((gxy, gwh), 1)) + + return tcls, tbox, indices, anch, tidxs, xywh + + +class MaskIOULoss(nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, pred_mask, gt_mask, mxyxy=None): + """ + Args: + pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n) + gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n) + mxyxy (torch.Tensor): ground truth of boxes, (n, 4) + """ + _, _, n = pred_mask.shape # same as gt_mask + pred_mask = pred_mask.sigmoid() + if mxyxy is not None: + pred_mask = crop(pred_mask, mxyxy) + gt_mask = crop(gt_mask, mxyxy) + pred_mask = pred_mask.permute(2, 0, 1).view(n, -1) + gt_mask = gt_mask.permute(2, 0, 1).view(n, -1) + iou = masks_iou(pred_mask, gt_mask) + return 1.0 - iou + +import math + +def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 + box2 = box2.T + + # Get the coordinates of bounding boxes + if x1y1x2y2: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + else: # transform from xywh to xyxy + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + + # Intersection area + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( + torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) + ).clamp(0) + + # Union Area + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + union = w1 * h1 + w2 * h2 - inter + eps + + iou = inter / union + if GIoU or DIoU or CIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min( + b1_x1, b2_x1 + ) # convex (smallest enclosing box) width + ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height + if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 + c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared + rho2 = ( + (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 + ) / 4 # center distance squared + if DIoU: + return iou - rho2 / c2 # DIoU + elif ( + CIoU + ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow( + torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 + ) + with torch.no_grad(): + alpha = v / (v - iou + (1 + eps)) + return iou - (rho2 / c2 + v * alpha) # CIoU + else: # GIoU https://arxiv.org/pdf/1902.09630.pdf + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU + else: + return iou # IoU \ No newline at end of file diff --git a/utils/segment.py b/utils/segment.py new file mode 100644 index 000000000000..89f6627a6259 --- /dev/null +++ b/utils/segment.py @@ -0,0 +1,318 @@ +import numpy as np +import time +import cv2 +import torch.nn.functional as F +import torch +import torchvision +from .general import xyxy2xywh, xywh2xyxy +from .metrics import box_iou + +def segment2box(segment, width=640, height=640): + # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) + x, y = segment.T # segment xy + inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) + x, y, = ( + x[inside], + y[inside], + ) + return ( + np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) + ) # xyxy + + +def segments2boxes(segments): + # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) + boxes = [] + for s in segments: + x, y = s.T # segment xy + boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy + return xyxy2xywh(np.array(boxes)) # cls, xywh + + +def resample_segments(segments, n=1000): + # Up-sample an (n,2) segment + for i, s in enumerate(segments): + x = np.linspace(0, len(s) - 1, n) + xp = np.arange(len(s)) + segments[i] = ( + np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]) + .reshape(2, -1) + .T + ) # segment xy + return segments + +def non_max_suppression_masks( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + mask_dim=32, +): + """Runs Non-Maximum Suppression (NMS) on inference results + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert ( + 0 <= conf_thres <= 1 + ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" + assert ( + 0 <= iou_thres <= 1 + ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + + # Settings + min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 10.0 # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + nm = 5 + mask_dim + + t = time.time() + output = [ + torch.zeros((0, 6 + mask_dim), device=prediction.device) + ] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + pred_masks = x[:, 5:nm] + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + l = labels[xi] + v = torch.zeros((len(l), nc + 5), device=x.device) + v[:, :4] = l[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, nm:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat( + (box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1 + ) + else: # best class only + conf, j = x[:, nm:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), pred_masks), 1)[ + conf.view(-1) > conf_thres + ] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( + 1, keepdim=True + ) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + print(f"WARNING: NMS time limit {time_limit}s exceeded") + break # time limit exceeded + + return output + +def crop(masks, boxes): + """ + "Crop" predicted masks by zeroing out everything not in the predicted bbox. + Vectorized by Chong (thanks Chong). + + Args: + - masks should be a size [h, w, n] tensor of masks + - boxes should be a size [n, 4] tensor of bbox coords in relative point form + """ + h, w, n = masks.size() + x1, x2 = boxes[:, 0], boxes[:, 2] + y1, y2 = ( + boxes[:, 1], + boxes[:, 3], + ) + + rows = ( + torch.arange(w, device=masks.device, dtype=x1.dtype) + .view(1, -1, 1) + .expand(h, w, n) + ) + cols = ( + torch.arange(h, device=masks.device, dtype=x1.dtype) + .view(-1, 1, 1) + .expand(h, w, n) + ) + + # (1, w, 1), (1, 1, n) -> (1, w, n) + masks_left = rows >= x1.view(1, 1, -1) + masks_right = rows < x2.view(1, 1, -1) + # (h, 1, 1), (1, 1, n) -> (h, 1, n) + masks_up = cols >= y1.view(1, 1, -1) + masks_down = cols < y2.view(1, 1, -1) + + # (h, w, n) + crop_mask = masks_left * masks_right * masks_up * masks_down + + return masks * crop_mask.float() + +def process_mask_upsample(proto_out, out_masks, bboxes, shape): + """ + Crop after unsample. + proto_out: [mask_dim, mask_h, mask_w] + out_masks: [n, mask_dim], n is number of masks after nms + bboxes: [n, 4], n is number of masks after nms + shape:input_image_size, (h, w) + + return: h, w, n + """ + # mask_h, mask_w, n + masks = proto_out.float().permute( + 1, 2, 0).contiguous() @ out_masks.float().tanh().T + # print(masks.shape) + masks = masks.sigmoid() + # print('after sigmoid:', masks) + masks = masks.permute(2, 0, 1).contiguous() + # [n, mask_h, mask_w] + masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0) + # [mask_h, mask_w, n] + masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes) + return masks.gt_(0.5) # .gt_(0.2) + +def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): + """ + Crop before unsample. + proto_out: [mask_dim, mask_h, mask_w] + out_masks: [n, mask_dim], n is number of masks after nms + bboxes: [n, 4], n is number of masks after nms + shape:input_image_size, (h, w) + + return: h, w, n + """ + downsampled_bboxes = bboxes.clone() + mh, mw = proto_out.shape[1:] + ih, iw = shape + # mask_h, mask_w, n + masks = proto_out.float().permute( + 1, 2, 0).contiguous() @ out_masks.float().tanh().T + # print(masks) + masks = masks.sigmoid() + # print('after sigmoid:', masks) + downsampled_bboxes[:, 0] = downsampled_bboxes[:, 0] / iw * mw + downsampled_bboxes[:, 2] = downsampled_bboxes[:, 2] / iw * mw + downsampled_bboxes[:, 1] = downsampled_bboxes[:, 1] / ih * mh + downsampled_bboxes[:, 3] = downsampled_bboxes[:, 3] / ih * mh + masks = crop(masks, downsampled_bboxes) + masks = masks.permute(2, 0, 1).contiguous() + # [n, mask_h, mask_w] + if upsample: + masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0) + return masks.gt_(0.5).permute(1, 2, 0).contiguous() + +def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): + """ + img1_shape: model input shape, [h, w] + img0_shape: origin pic shape, [h, w, 3] + masks: [h, w, num] + resize for the most time + """ + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], + img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, ( + img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + tl_pad = int(pad[1]), int(pad[0]) # y, x + br_pad = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0]) + + if len(masks.shape) < 2: + raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') + # masks_h, masks_w, n + masks = masks[tl_pad[0]:br_pad[0], tl_pad[1]:br_pad[1]] + # 1, n, masks_h, masks_w + # masks = masks.permute(2, 0, 1).contiguous()[None, :] + # # shape = [1, n, masks_h, masks_w] after F.interpolate, so take first element + # masks = F.interpolate(masks, img0_shape[:2], mode='bilinear', align_corners=False)[0] + # masks = masks.permute(1, 2, 0).contiguous() + # masks_h, masks_w, n + masks = cv2.resize(masks, (img0_shape[1], img0_shape[0])) + + # keepdim + if len(masks.shape) == 2: + masks = masks[:, :, None] + + return masks + +def mask_iou(mask1, mask2): + """ + mask1: [N, n] m1 means number of predicted objects + mask2: [M, n] m2 means number of gt objects + Note: n means image_w x image_h + + return: masks iou, [N, M] + """ + # print(mask1.shape) + # print(mask2.shape) + intersection = torch.matmul(mask1, mask2.t()).clamp(0) + area1 = torch.sum(mask1, dim=1).view(1, -1) + area2 = torch.sum(mask2, dim=1).view(1, -1) + union = (area1.t() + area2) - intersection + + return intersection / (union + 1e-7) + +def masks_iou(mask1, mask2): + """ + mask1: [N, n] m1 means number of predicted objects + mask2: [N, n] m2 means number of gt objects + Note: n means image_w x image_h + + return: masks iou, (N, ) + """ + intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) + area1 = torch.sum(mask1, dim=1).view(1, -1) + area2 = torch.sum(mask2, dim=1).view(1, -1) + union = (area1 + area2) - intersection + return intersection / (union + 1e-7) \ No newline at end of file From e151ee177750a5568d872cb346128567d5d5ff27 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 13:12:08 +0530 Subject: [PATCH 002/247] deterministic --- train_instseg.py | 2 +- utils/loggers/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train_instseg.py b/train_instseg.py index ff85f1eb36b5..825edc6b7415 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -113,7 +113,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Config plots = not evolve and not opt.noplots # create plots cuda = device.type != 'cpu' - init_seeds(opt.seed + 1 + RANK) + init_seeds(opt.seed + 1 + RANK, True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index bf95d82203b8..65c673c64498 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -240,7 +240,7 @@ def __init__( try: import wandb from wandb import __version__ - wandb.init(project="YOLOv5-Inst-seg", config=opt) + wandb.init(project=opt.project, name=opt.name, config=opt) except ImportError: wandb = None pass From bcb5bcb617917c89d73665f679eb1e4507b88d5c Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 15:58:38 +0530 Subject: [PATCH 003/247] allow mask_ratio --- train_instseg.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/train_instseg.py b/train_instseg.py index 825edc6b7415..ad7ced91e4c2 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -29,6 +29,7 @@ import torch.nn as nn import yaml from torch.nn.parallel import DistributedDataParallel as DDP +import torch.nn.functional as F from torch.optim import SGD, Adam, AdamW, lr_scheduler from tqdm import tqdm @@ -253,7 +254,9 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio quad=opt.quad, prefix=colorstr('train: '), mask_head=True, - shuffle=True) + shuffle=True, + mask_downsample_ratio=mask_ratio + ) mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class nb = len(train_loader) # number of batches assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' @@ -272,6 +275,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio workers=workers * 2, pad=0.5, mask_head=True, + mask_downsample_ratio=mask_ratio, prefix=colorstr('val: '))[0] if not resume: @@ -396,6 +400,14 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(("%10s" * 2 + "%10.4g" * 6) % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1])) + # for plots + if mask_ratio != 1: + masks = F.interpolate( + masks[None, :], + (imgsz, imgsz), + mode="bilinear", + align_corners=False, + ).squeeze(0) callbacks.run('on_train_batch_end', ni, model, imgs, targets,masks, paths, plots, opt.sync_bn, None) if callbacks.stop_training: From 472a45015843029c80a39af98e0cbb65ef8e72af Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 21:41:03 +0530 Subject: [PATCH 004/247] attempt class renaming --- train_instseg.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train_instseg.py b/train_instseg.py index ad7ced91e4c2..314c18be2c75 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -45,7 +45,7 @@ from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks -from seg_dataloaders import create_dataloader +from seg_dataloaders import create_dataloader, create_dataloader_ori from utils.downloads import attempt_download from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, @@ -239,7 +239,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio LOGGER.info('Using SyncBatchNorm()') # Trainloader - train_loader, dataset = create_dataloader(train_path, + train_loader, dataset = create_dataloader_ori(train_path, imgsz, batch_size // WORLD_SIZE, gs, @@ -263,7 +263,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Process 0 if RANK in {-1, 0}: - val_loader = create_dataloader(val_path, + val_loader = create_dataloader_ori(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, From 9871073b37127e5cf1b3bd799cdd560239674edc Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 21:42:43 +0530 Subject: [PATCH 005/247] attempt class --- data/coco.yaml | 4 +++- train_instseg.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/data/coco.yaml b/data/coco.yaml index 0c0c4adab05d..c07c27816796 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -14,7 +14,8 @@ val: val2017.txt # val images (relative to 'path') 5000 images test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 # Classes -nc: 80 # number of classes +nc: 91 # number of classes +''' names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', @@ -24,6 +25,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # class names +''' # Download script/URL (optional) diff --git a/train_instseg.py b/train_instseg.py index 314c18be2c75..ad7ced91e4c2 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -45,7 +45,7 @@ from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks -from seg_dataloaders import create_dataloader, create_dataloader_ori +from seg_dataloaders import create_dataloader from utils.downloads import attempt_download from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, @@ -239,7 +239,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio LOGGER.info('Using SyncBatchNorm()') # Trainloader - train_loader, dataset = create_dataloader_ori(train_path, + train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, @@ -263,7 +263,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Process 0 if RANK in {-1, 0}: - val_loader = create_dataloader_ori(val_path, + val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, From a426c30326ca8c0654c9723d96dd3453cf6e1685 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 21:44:27 +0530 Subject: [PATCH 006/247] attempt cls format --- data/coco.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/data/coco.yaml b/data/coco.yaml index c07c27816796..35d3001404fc 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -15,18 +15,6 @@ test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions. # Classes nc: 91 # number of classes -''' -names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', - 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', - 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', - 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', - 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', - 'hair drier', 'toothbrush'] # class names -''' - # Download script/URL (optional) download: | From 19ec985e02a7d95cd19861b22599e3c1bc69e50b Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 21:57:41 +0530 Subject: [PATCH 007/247] revert --- data/coco.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/data/coco.yaml b/data/coco.yaml index 35d3001404fc..0c0c4adab05d 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -14,7 +14,17 @@ val: val2017.txt # val images (relative to 'path') 5000 images test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 # Classes -nc: 91 # number of classes +nc: 80 # number of classes +names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush'] # class names + # Download script/URL (optional) download: | From 7f552344a72f0da50efe77246ddb8c9743e6d6d2 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 22:03:28 +0530 Subject: [PATCH 008/247] attempt --- models/yolov5m_seg.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/yolov5m_seg.yaml b/models/yolov5m_seg.yaml index 6b19539786b2..37a0bb3f6050 100644 --- a/models/yolov5m_seg.yaml +++ b/models/yolov5m_seg.yaml @@ -45,4 +45,4 @@ head: [-1, 3, C3, [1024, False]], # 23 (P5/32-large) [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) - ] + ] \ No newline at end of file From ff5f80f0258322a4fa41df5928c8ff3610f3f9fb Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 22:22:33 +0530 Subject: [PATCH 009/247] print mlc --- train_instseg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/train_instseg.py b/train_instseg.py index ad7ced91e4c2..9be5c42a9ef7 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -258,6 +258,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mask_downsample_ratio=mask_ratio ) mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class + print("mlc , nc ", mlc, " ", nc ) nb = len(train_loader) # number of batches assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' From 0d9f53df03d0b7fc0b5432ced3b8ff0c9647d80b Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 22:35:23 +0530 Subject: [PATCH 010/247] add pdb --- utils/seg_loss.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/seg_loss.py b/utils/seg_loss.py index d4cf26401bc6..c0ccb7525c56 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -184,6 +184,7 @@ def loss_segment(self, preds, targets, masks): mxywh = xywh[i] mws, mhs = mxywh[:, 2:].T mws, mhs = mws / pi.shape[3], mhs / pi.shape[2] + import pdb;pdb.set_trace() mxywhs = ( mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] From 0ee84c5cd1131af25ef2a9115cf997da40eb28e7 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 22:39:37 +0530 Subject: [PATCH 011/247] pdb --- utils/seg_loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/seg_loss.py b/utils/seg_loss.py index c0ccb7525c56..02cb65711ff4 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -171,6 +171,7 @@ def loss_segment(self, preds, targets, masks): t = torch.full_like(ps[:, self.nm :], self.cn, device=device) # targets t[range(n), tcls[i]] = self.cp lcls += self.BCEcls(ps[:, self.nm :], t) # BCE + import pdb;pdb.set_trace() # Mask Regression mask_gt = masks[tidxs[i]] @@ -184,7 +185,6 @@ def loss_segment(self, preds, targets, masks): mxywh = xywh[i] mws, mhs = mxywh[:, 2:].T mws, mhs = mws / pi.shape[3], mhs / pi.shape[2] - import pdb;pdb.set_trace() mxywhs = ( mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] From 63fce9a9e9da5ce7ac96a5edc7f2dde61233efc3 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 11 Jul 2022 23:12:46 +0530 Subject: [PATCH 012/247] revert --- utils/seg_loss.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/seg_loss.py b/utils/seg_loss.py index 02cb65711ff4..d4cf26401bc6 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -171,7 +171,6 @@ def loss_segment(self, preds, targets, masks): t = torch.full_like(ps[:, self.nm :], self.cn, device=device) # targets t[range(n), tcls[i]] = self.cp lcls += self.BCEcls(ps[:, self.nm :], t) # BCE - import pdb;pdb.set_trace() # Mask Regression mask_gt = masks[tidxs[i]] From 023255fc8c5735202b6a0809f9f2d4f019d970f6 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 12 Jul 2022 11:53:31 +0530 Subject: [PATCH 013/247] allow plotting --- train_instseg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_instseg.py b/train_instseg.py index 9be5c42a9ef7..b5a307097368 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -169,7 +169,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mask=True, verbose=False, mask_downsample_ratio=mask_ratio, - plots=False + plots=True ) g = [], [], [] # optimizer parameter groups bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() From d533e303d2bf039e870dcf68d77753e7a9b5b695 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 13 Jul 2022 10:35:50 +0530 Subject: [PATCH 014/247] make compatible with train.py --- evaluator.py | 2 +- utils/metrics.py | 382 +++++++++++++---------------------- utils/seg_metrics.py | 465 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 602 insertions(+), 247 deletions(-) create mode 100644 utils/seg_metrics.py diff --git a/evaluator.py b/evaluator.py index e15d090ad625..3ed19bc529b0 100644 --- a/evaluator.py +++ b/evaluator.py @@ -42,7 +42,7 @@ process_mask_upsample, scale_masks, ) -from utils.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix +from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix from utils.plots import output_to_target, plot_images_boxes_and_masks from utils.torch_utils import select_device, time_sync from PIL import Image diff --git a/utils/metrics.py b/utils/metrics.py index 8646931bed00..cfdfbdb88b2c 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -5,7 +5,6 @@ import math import warnings -from easydict import EasyDict as edict from pathlib import Path import matplotlib.pyplot as plt @@ -13,19 +12,22 @@ import torch -def fitness(x, masks=False): +def fitness(x): # Model fitness as a weighted combination of metrics - if masks: - w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] - return (x[:, :8] * w).sum(1) w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] return (x[:, :4] * w).sum(1) -def ap_per_class( - tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix="" -): - """Compute the average precision, given the recall and precision curves. +def smooth(y, f=0.05): + # Box filter of fraction f + nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) + p = np.ones(nf // 2) # ones padding + yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded + return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed + + +def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16): + """ Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments tp: True positives (nparray, nx1 or nx10). @@ -33,8 +35,7 @@ def ap_per_class( pred_cls: Predicted object classes (nparray). target_cls: True object classes (nparray). plot: Plot precision-recall curve at mAP@0.5 - save_dir: Plot save directory. - prefix: prefix. + save_dir: Plot save directory # Returns The average precision as computed in py-faster-rcnn. """ @@ -44,7 +45,7 @@ def ap_per_class( tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] # Find unique classes - unique_classes = np.unique(target_cls) + unique_classes, nt = np.unique(target_cls, return_counts=True) nc = unique_classes.shape[0] # number of classes, number of detections # Create Precision-Recall curve and compute AP for each class @@ -52,114 +53,48 @@ def ap_per_class( ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) for ci, c in enumerate(unique_classes): i = pred_cls == c - n_l = (target_cls == c).sum() # number of labels + n_l = nt[ci] # number of labels n_p = i.sum() # number of predictions - if n_p == 0 or n_l == 0: continue - else: - # Accumulate FPs and TPs - fpc = (1 - tp[i]).cumsum(0) - tpc = tp[i].cumsum(0) - - # Recall - recall = tpc / (n_l + 1e-16) # recall curve - r[ci] = np.interp( - -px, -conf[i], recall[:, 0], left=0 - ) # negative x, xp because xp decreases - - # Precision - precision = tpc / (tpc + fpc) # precision curve - p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score - - # AP from recall-precision curve - for j in range(tp.shape[1]): - ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) - if plot and j == 0: - py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + eps) # recall curve + r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if plot and j == 0: + py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 # Compute F1 (harmonic mean of precision and recall) - f1 = 2 * p * r / (p + r + 1e-16) - names = [ - v for k, v in names.items() if k in unique_classes - ] # list: only classes that have data - names = {i: v for i, v in enumerate(names)} # to dict - if plot and save_dir is not None: - plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names) - plot_mc_curve( - px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1" - ) - plot_mc_curve( - px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision" - ) - plot_mc_curve( - px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall" - ) - - i = f1.mean(0).argmax() # max F1 index - return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32") - - -def ap_per_class_box_and_mask( - tp_m, - tp_b, - conf, - pred_cls, - target_cls, - plot=False, - save_dir=".", - names=(), -): - """ - Args: - tp_b: tp of boxes. - tp_m: tp of masks. - other arguments see `func: ap_per_class`. - """ - results_boxes = ap_per_class( - tp_b, - conf, - pred_cls, - target_cls, - plot=plot, - save_dir=save_dir, - names=names, - prefix="Box", - ) - results_masks = ap_per_class( - tp_m, - conf, - pred_cls, - target_cls, - plot=plot, - save_dir=save_dir, - names=names, - prefix="Mask", - ) - - results = edict( - { - "boxes": { - "p": results_boxes[0], - "r": results_boxes[1], - "ap": results_boxes[2], - "f1": results_boxes[3], - "ap_class": results_boxes[4], - }, - "masks": { - "p": results_masks[0], - "r": results_masks[1], - "ap": results_masks[2], - "f1": results_masks[3], - "ap_class": results_masks[4], - }, - } - ) - return results + f1 = 2 * p * r / (p + r + eps) + names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data + names = dict(enumerate(names)) # to dict + if plot: + plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) + plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') + plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') + plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') + + i = smooth(f1.mean(0), 0.1).argmax() # max F1 index + p, r, f1 = p[:, i], r[:, i], f1[:, i] + tp = (r * nt).round() # true positives + fp = (tp / (p + eps) - tp).round() # false positives + return tp, fp, p, r, f1, ap, unique_classes.astype(int) def compute_ap(recall, precision): - """Compute the average precision, given the recall and precision curves + """ Compute the average precision, given the recall and precision curves # Arguments recall: The recall curve (list) precision: The precision curve (list) @@ -175,8 +110,8 @@ def compute_ap(recall, precision): mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) # Integrate area under curve - method = "interp" # methods: 'continuous', 'interp' - if method == "interp": + method = 'interp' # methods: 'continuous', 'interp' + if method == 'interp': x = np.linspace(0, 1, 101) # 101-point interp (COCO) ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate else: # 'continuous' @@ -211,11 +146,7 @@ def process_batch(self, detections, labels): x = torch.where(iou > self.iou_thres) if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -225,7 +156,7 @@ def process_batch(self, detections, labels): matches = np.zeros((0, 3)) n = matches.shape[0] > 0 - m0, m1, _ = matches.transpose().astype(np.int16) + m0, m1, _ = matches.transpose().astype(int) for i, gc in enumerate(gt_classes): j = m0 == i if n and sum(j) == 1: @@ -241,101 +172,94 @@ def process_batch(self, detections, labels): def matrix(self): return self.matrix - def plot(self, normalize=True, save_dir="", names=()): + def tp_fp(self): + tp = self.matrix.diagonal() # true positives + fp = self.matrix.sum(1) - tp # false positives + # fn = self.matrix.sum(0) - tp # false negatives (missed detections) + return tp[:-1], fp[:-1] # remove background class + + def plot(self, normalize=True, save_dir='', names=()): try: import seaborn as sn - array = self.matrix / ( - (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1 - ) # normalize columns + array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) fig = plt.figure(figsize=(12, 9), tight_layout=True) - sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size - labels = (0 < len(names) < 99) and len( - names - ) == self.nc # apply names to ticklabels + nc, nn = self.nc, len(names) # number of classes, names + sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size + labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels with warnings.catch_warnings(): - warnings.simplefilter( - "ignore" - ) # suppress empty matrix RuntimeWarning: All-NaN slice encountered - sn.heatmap( - array, - annot=self.nc < 30, - annot_kws={"size": 8}, - cmap="Blues", - fmt=".2f", - square=True, - xticklabels=names + ["background FP"] if labels else "auto", - yticklabels=names + ["background FN"] if labels else "auto", - ).set_facecolor((1, 1, 1)) - fig.axes[0].set_xlabel("True") - fig.axes[0].set_ylabel("Predicted") - fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250) + warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered + sn.heatmap(array, + annot=nc < 30, + annot_kws={ + "size": 8}, + cmap='Blues', + fmt='.2f', + square=True, + vmin=0.0, + xticklabels=names + ['background FP'] if labels else "auto", + yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1)) + fig.axes[0].set_xlabel('True') + fig.axes[0].set_ylabel('Predicted') + fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) plt.close() except Exception as e: - print(f"WARNING: ConfusionMatrix plot failure: {e}") + print(f'WARNING: ConfusionMatrix plot failure: {e}') def print(self): for i in range(self.nc + 1): - print(" ".join(map(str, self.matrix[i]))) + print(' '.join(map(str, self.matrix[i]))) -def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): - # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 - box2 = box2.T +def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4) # Get the coordinates of bounding boxes - if x1y1x2y2: # x1, y1, x2, y2 = box1 - b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] - else: # transform from xywh to xyxy - b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 - b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 - b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 - b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + if xywh: # transform from xywh to xyxy + (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, 1), box2.chunk(4, 1) + w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 + b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ + b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ + else: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, 1) + b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, 1) + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 # Intersection area - inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( - torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) - ).clamp(0) + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) # Union Area - w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps - w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps union = w1 * h1 + w2 * h2 - inter + eps + # IoU iou = inter / union - if GIoU or DIoU or CIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min( - b1_x1, b2_x1 - ) # convex (smallest enclosing box) width + if CIoU or DIoU or GIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared - rho2 = ( - (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 - + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 - ) / 4 # center distance squared - if DIoU: - return iou - rho2 / c2 # DIoU - elif ( - CIoU - ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow( - torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 - ) + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 + if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU - else: # GIoU https://arxiv.org/pdf/1902.09630.pdf - c_area = cw * ch + eps # convex area - return iou - (c_area - union) / c_area # GIoU - else: - return iou # IoU + return iou - rho2 / c2 # DIoU + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf + return iou # IoU + + +def box_area(box): + # box = xyxy(4,n) + return (box[2] - box[0]) * (box[3] - box[1]) -def box_iou(box1, box2): +def box_iou(box1, box2, eps=1e-7): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py """ Return intersection-over-union (Jaccard index) of boxes. @@ -348,44 +272,28 @@ def box_iou(box1, box2): IoU values for every element in boxes1 and boxes2 """ - def box_area(box): - # box = 4xn - return (box[2] - box[0]) * (box[3] - box[1]) - - area1 = box_area(box1.T) - area2 = box_area(box2.T) - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - inter = ( - ( - torch.min(box1[:, None, 2:], box2[:, 2:]) - - torch.max(box1[:, None, :2], box2[:, :2]) - ) - .clamp(0) - .prod(2) - ) - return inter / ( - area1[:, None] + area2 - inter - ) # iou = inter / (area1 + area2 - inter) + (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1) + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps) def bbox_ioa(box1, box2, eps=1e-7): - """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 + """ Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 box1: np.array of shape(4) box2: np.array of shape(nx4) returns: np.array of shape(n) """ - box2 = box2.transpose() - # Get the coordinates of bounding boxes - b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + b1_x1, b1_y1, b1_x2, b1_y2 = box1 + b2_x1, b2_y1, b2_x2, b2_y2 = box2.T # Intersection area - inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * ( - np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1) - ).clip(0) + inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ + (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) # box2 area box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps @@ -394,72 +302,54 @@ def bbox_ioa(box1, box2, eps=1e-7): return inter_area / box2_area -def wh_iou(wh1, wh2): +def wh_iou(wh1, wh2, eps=1e-7): # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = torch.min(wh1, wh2).prod(2) # [N,M] - return inter / ( - wh1.prod(2) + wh2.prod(2) - inter - ) # iou = inter / (area1 + area2 - inter) + return inter / (wh1.prod(2) + wh2.prod(2) - inter + eps) # iou = inter / (area1 + area2 - inter) # Plots ---------------------------------------------------------------------------------------------------------------- -def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()): +def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()): # Precision-recall curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) py = np.stack(py, axis=1) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py.T): - ax.plot( - px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}" - ) # plot(recall, precision) + ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) else: - ax.plot(px, py, linewidth=1, color="grey") # plot(recall, precision) - - ax.plot( - px, - py.mean(1), - linewidth=3, - color="blue", - label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), - ) - ax.set_xlabel("Recall") - ax.set_ylabel("Precision") + ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) + + ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) + ax.set_xlabel('Recall') + ax.set_ylabel('Precision') ax.set_xlim(0, 1) ax.set_ylim(0, 1) plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") - fig.savefig(Path(save_dir), dpi=250) + fig.savefig(save_dir, dpi=250) plt.close() -def plot_mc_curve( - px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric" -): +def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'): # Metric-confidence curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py): - ax.plot(px, y, linewidth=1, label=f"{names[i]}") # plot(confidence, metric) + ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) else: - ax.plot(px, py.T, linewidth=1, color="grey") # plot(confidence, metric) - - y = py.mean(0) - ax.plot( - px, - y, - linewidth=3, - color="blue", - label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", - ) + ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) + + y = smooth(py.mean(0), 0.05) + ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(0, 1) ax.set_ylim(0, 1) plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") - fig.savefig(Path(save_dir), dpi=250) - plt.close() + fig.savefig(save_dir, dpi=250) + plt.close() \ No newline at end of file diff --git a/utils/seg_metrics.py b/utils/seg_metrics.py new file mode 100644 index 000000000000..8646931bed00 --- /dev/null +++ b/utils/seg_metrics.py @@ -0,0 +1,465 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Model validation metrics +""" + +import math +import warnings +from easydict import EasyDict as edict +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import torch + + +def fitness(x, masks=False): + # Model fitness as a weighted combination of metrics + if masks: + w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] + return (x[:, :8] * w).sum(1) + w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def ap_per_class( + tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix="" +): + """Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (nparray, nx1 or nx10). + conf: Objectness value from 0-1 (nparray). + pred_cls: Predicted object classes (nparray). + target_cls: True object classes (nparray). + plot: Plot precision-recall curve at mAP@0.5 + save_dir: Plot save directory. + prefix: prefix. + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + nc = unique_classes.shape[0] # number of classes, number of detections + + # Create Precision-Recall curve and compute AP for each class + px, py = np.linspace(0, 1, 1000), [] # for plotting + ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) + for ci, c in enumerate(unique_classes): + i = pred_cls == c + n_l = (target_cls == c).sum() # number of labels + n_p = i.sum() # number of predictions + + if n_p == 0 or n_l == 0: + continue + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + 1e-16) # recall curve + r[ci] = np.interp( + -px, -conf[i], recall[:, 0], left=0 + ) # negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if plot and j == 0: + py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + + # Compute F1 (harmonic mean of precision and recall) + f1 = 2 * p * r / (p + r + 1e-16) + names = [ + v for k, v in names.items() if k in unique_classes + ] # list: only classes that have data + names = {i: v for i, v in enumerate(names)} # to dict + if plot and save_dir is not None: + plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names) + plot_mc_curve( + px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1" + ) + plot_mc_curve( + px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision" + ) + plot_mc_curve( + px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall" + ) + + i = f1.mean(0).argmax() # max F1 index + return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32") + + +def ap_per_class_box_and_mask( + tp_m, + tp_b, + conf, + pred_cls, + target_cls, + plot=False, + save_dir=".", + names=(), +): + """ + Args: + tp_b: tp of boxes. + tp_m: tp of masks. + other arguments see `func: ap_per_class`. + """ + results_boxes = ap_per_class( + tp_b, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Box", + ) + results_masks = ap_per_class( + tp_m, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Mask", + ) + + results = edict( + { + "boxes": { + "p": results_boxes[0], + "r": results_boxes[1], + "ap": results_boxes[2], + "f1": results_boxes[3], + "ap_class": results_boxes[4], + }, + "masks": { + "p": results_masks[0], + "r": results_masks[1], + "ap": results_masks[2], + "f1": results_masks[3], + "ap_class": results_masks[4], + }, + } + ) + return results + + +def compute_ap(recall, precision): + """Compute the average precision, given the recall and precision curves + # Arguments + recall: The recall curve (list) + precision: The precision curve (list) + # Returns + Average precision, precision curve, recall curve + """ + + # Append sentinel values to beginning and end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([1.0], precision, [0.0])) + + # Compute the precision envelope + mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) + + # Integrate area under curve + method = "interp" # methods: 'continuous', 'interp' + if method == "interp": + x = np.linspace(0, 1, 101) # 101-point interp (COCO) + ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate + else: # 'continuous' + i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + + return ap, mpre, mrec + + +class ConfusionMatrix: + # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix + def __init__(self, nc, conf=0.25, iou_thres=0.45): + self.matrix = np.zeros((nc + 1, nc + 1)) + self.nc = nc # number of classes + self.conf = conf + self.iou_thres = iou_thres + + def process_batch(self, detections, labels): + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + detections (Array[N, 6]), x1, y1, x2, y2, conf, class + labels (Array[M, 5]), class, x1, y1, x2, y2 + Returns: + None, updates confusion matrix accordingly + """ + detections = detections[detections[:, 4] > self.conf] + gt_classes = labels[:, 0].int() + detection_classes = detections[:, 5].int() + iou = box_iou(labels[:, 1:], detections[:, :4]) + + x = torch.where(iou > self.iou_thres) + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + else: + matches = np.zeros((0, 3)) + + n = matches.shape[0] > 0 + m0, m1, _ = matches.transpose().astype(np.int16) + for i, gc in enumerate(gt_classes): + j = m0 == i + if n and sum(j) == 1: + self.matrix[detection_classes[m1[j]], gc] += 1 # correct + else: + self.matrix[self.nc, gc] += 1 # background FP + + if n: + for i, dc in enumerate(detection_classes): + if not any(m1 == i): + self.matrix[dc, self.nc] += 1 # background FN + + def matrix(self): + return self.matrix + + def plot(self, normalize=True, save_dir="", names=()): + try: + import seaborn as sn + + array = self.matrix / ( + (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1 + ) # normalize columns + array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) + + fig = plt.figure(figsize=(12, 9), tight_layout=True) + sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size + labels = (0 < len(names) < 99) and len( + names + ) == self.nc # apply names to ticklabels + with warnings.catch_warnings(): + warnings.simplefilter( + "ignore" + ) # suppress empty matrix RuntimeWarning: All-NaN slice encountered + sn.heatmap( + array, + annot=self.nc < 30, + annot_kws={"size": 8}, + cmap="Blues", + fmt=".2f", + square=True, + xticklabels=names + ["background FP"] if labels else "auto", + yticklabels=names + ["background FN"] if labels else "auto", + ).set_facecolor((1, 1, 1)) + fig.axes[0].set_xlabel("True") + fig.axes[0].set_ylabel("Predicted") + fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250) + plt.close() + except Exception as e: + print(f"WARNING: ConfusionMatrix plot failure: {e}") + + def print(self): + for i in range(self.nc + 1): + print(" ".join(map(str, self.matrix[i]))) + + +def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 + box2 = box2.T + + # Get the coordinates of bounding boxes + if x1y1x2y2: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + else: # transform from xywh to xyxy + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + + # Intersection area + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( + torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) + ).clamp(0) + + # Union Area + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + union = w1 * h1 + w2 * h2 - inter + eps + + iou = inter / union + if GIoU or DIoU or CIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min( + b1_x1, b2_x1 + ) # convex (smallest enclosing box) width + ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height + if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 + c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared + rho2 = ( + (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 + ) / 4 # center distance squared + if DIoU: + return iou - rho2 / c2 # DIoU + elif ( + CIoU + ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow( + torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 + ) + with torch.no_grad(): + alpha = v / (v - iou + (1 + eps)) + return iou - (rho2 / c2 + v * alpha) # CIoU + else: # GIoU https://arxiv.org/pdf/1902.09630.pdf + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU + else: + return iou # IoU + + +def box_iou(box1, box2): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + + area1 = box_area(box1.T) + area2 = box_area(box2.T) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = ( + ( + torch.min(box1[:, None, 2:], box2[:, 2:]) + - torch.max(box1[:, None, :2], box2[:, :2]) + ) + .clamp(0) + .prod(2) + ) + return inter / ( + area1[:, None] + area2 - inter + ) # iou = inter / (area1 + area2 - inter) + + +def bbox_ioa(box1, box2, eps=1e-7): + """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 + box1: np.array of shape(4) + box2: np.array of shape(nx4) + returns: np.array of shape(n) + """ + + box2 = box2.transpose() + + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + + # Intersection area + inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * ( + np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1) + ).clip(0) + + # box2 area + box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps + + # Intersection over box2 area + return inter_area / box2_area + + +def wh_iou(wh1, wh2): + # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 + wh1 = wh1[:, None] # [N,1,2] + wh2 = wh2[None] # [1,M,2] + inter = torch.min(wh1, wh2).prod(2) # [N,M] + return inter / ( + wh1.prod(2) + wh2.prod(2) - inter + ) # iou = inter / (area1 + area2 - inter) + + +# Plots ---------------------------------------------------------------------------------------------------------------- + + +def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()): + # Precision-recall curve + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + py = np.stack(py, axis=1) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py.T): + ax.plot( + px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}" + ) # plot(recall, precision) + else: + ax.plot(px, py, linewidth=1, color="grey") # plot(recall, precision) + + ax.plot( + px, + py.mean(1), + linewidth=3, + color="blue", + label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), + ) + ax.set_xlabel("Recall") + ax.set_ylabel("Precision") + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + fig.savefig(Path(save_dir), dpi=250) + plt.close() + + +def plot_mc_curve( + px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric" +): + # Metric-confidence curve + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py): + ax.plot(px, y, linewidth=1, label=f"{names[i]}") # plot(confidence, metric) + else: + ax.plot(px, py.T, linewidth=1, color="grey") # plot(confidence, metric) + + y = py.mean(0) + ax.plot( + px, + y, + linewidth=3, + color="blue", + label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", + ) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + fig.savefig(Path(save_dir), dpi=250) + plt.close() From 6a706e26e526d2014ae2f585c817c1cef64146ba Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 13 Jul 2022 10:37:04 +0530 Subject: [PATCH 015/247] use seg_metrics --- utils/segment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment.py b/utils/segment.py index 89f6627a6259..7a32ce518033 100644 --- a/utils/segment.py +++ b/utils/segment.py @@ -5,7 +5,7 @@ import torch import torchvision from .general import xyxy2xywh, xywh2xyxy -from .metrics import box_iou +from .seg_metrics import box_iou def segment2box(segment, width=640, height=640): # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) From 4c59f284566c6d8a07697570205ce64873ea78cd Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 13 Jul 2022 11:18:46 +0530 Subject: [PATCH 016/247] add TODOs --- evaluator.py | 2 ++ seg_augmentations.py | 4 ++- seg_dataloaders.py | 71 ++++---------------------------------------- utils/seg_loss.py | 5 ++-- 4 files changed, 13 insertions(+), 69 deletions(-) diff --git a/evaluator.py b/evaluator.py index 3ed19bc529b0..096befddeb5c 100644 --- a/evaluator.py +++ b/evaluator.py @@ -1,3 +1,5 @@ +# TODO: Optimize plotting, losses & merge with val.py + # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ Validate a trained YOLOv5 model accuracy on a custom dataset diff --git a/seg_augmentations.py b/seg_augmentations.py index 63055f640390..eddf1e31da22 100644 --- a/seg_augmentations.py +++ b/seg_augmentations.py @@ -1,3 +1,5 @@ +# TODO: Move to utils, merge with augmentations.py + # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ Image augmentation functions @@ -12,7 +14,7 @@ from utils.general import colorstr, check_version from utils.segment import segment2box, resample_segments -from utils.metrics import bbox_ioa +from utils.seg_metrics import bbox_ioa class Albumentations: diff --git a/seg_dataloaders.py b/seg_dataloaders.py index 31fb0a1872ba..7a3266e0e6f9 100644 --- a/seg_dataloaders.py +++ b/seg_dataloaders.py @@ -1,3 +1,5 @@ +## TODO: Move to utils, merge with dataloaders.py + # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ Dataloaders @@ -55,6 +57,7 @@ def __init__(self, sampler): def __iter__(self): while True: yield from iter(self.sampler) + class YoloBatchSampler(torchBatchSampler): """ This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler. @@ -70,71 +73,6 @@ def __iter__(self): for batch in super().__iter__(): yield [(self.augment, idx) for idx in batch] -def create_dataloader_ori( - path, - imgsz, - batch_size, - stride, - single_cls=False, - hyp=None, - augment=False, - cache=False, - pad=0.0, - rect=False, - rank=-1, - workers=8, - image_weights=False, - quad=False, - prefix="", - shuffle=False, - neg_dir="", - bg_dir="", - area_thr=0.2, - mask_head=False, - mask_downsample_ratio=1, -): - if rect and shuffle: - print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False") - shuffle = False - # Make sure only the first process in DDP process the dataset first, and the following others can use the cache - data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels - with torch_distributed_zero_first(rank): - dataset = data_load( - path, - imgsz, - batch_size, - augment=augment, # augment images - hyp=hyp, # augmentation hyperparameters - rect=rect, # rectangular training - cache_images=cache, - single_cls=single_cls, - stride=int(stride), - pad=pad, - image_weights=image_weights, - prefix=prefix, - neg_dir=neg_dir, - bg_dir=bg_dir, - area_thr=area_thr, - ) - if mask_head: - dataset.downsample_ratio = mask_downsample_ratio - - batch_size = min(batch_size, len(dataset)) - nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers - sampler = distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else None - loader = DataLoader if image_weights else InfiniteDataLoader - # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader() - dataloader = loader( - dataset, - batch_size=batch_size, - num_workers=nw, - shuffle=shuffle and sampler is None, - sampler=sampler, - pin_memory=True, - collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn, - ) - return dataloader, dataset - def create_dataloader( path, @@ -1196,6 +1134,7 @@ def hub_ops(f, max_dim=1920): return stats +# REFACTOR IN NEW FILE import os import glob import shutil @@ -1554,7 +1493,7 @@ def __iter__(self): yield next(self.iterator) -# NEW FILE +# REFACTOR IN A NEW FILE from PIL import Image, ImageDraw import numpy as np from PIL import ImageFile diff --git a/utils/seg_loss.py b/utils/seg_loss.py index d4cf26401bc6..d8d155739273 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -1,6 +1,8 @@ +# TODO: merge with loss.py.. Optimize speed + import torch from utils.torch_utils import de_parallel, is_parallel -from utils.general import xywh2xyxy +from utils.general import xywh2xyxy, Profile from utils.segment import mask_iou, masks_iou, crop import torch.nn.functional as F import torch.nn as nn @@ -134,7 +136,6 @@ def loss_segment(self, preds, targets, masks): tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks( p, targets ) # targets - # Losses for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx From 4aef933c072db5e33a68999a0992125da302f717 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 13 Jul 2022 14:15:03 +0530 Subject: [PATCH 017/247] increase line length --- evaluator.py | 414 +++++++---------------------- seg_augmentations.py | 147 +++------- seg_dataloaders.py | 621 ++++++++++--------------------------------- utils/seg_loss.py | 170 ++++-------- utils/seg_metrics.py | 180 +++---------- utils/segment.py | 103 +++---- 6 files changed, 375 insertions(+), 1260 deletions(-) diff --git a/evaluator.py b/evaluator.py index 096befddeb5c..636c73482c98 100644 --- a/evaluator.py +++ b/evaluator.py @@ -15,47 +15,26 @@ import numpy as np import torch import torch.nn.functional as F -#import pycocotools.mask as mask_util +from PIL import Image +# import pycocotools.mask as mask_util from tqdm import tqdm from models.experimental import attempt_load from seg_dataloaders import create_dataloader -from utils.general import ( - coco80_to_coco91_class, - increment_path, - colorstr, -) -from utils.general import ( - check_dataset, - check_img_size, - check_suffix, -) -from utils.general import ( - box_iou, - non_max_suppression, - scale_coords, - xyxy2xywh, - xywh2xyxy, -) -from utils.segment import ( - non_max_suppression_masks, - mask_iou, - process_mask, - process_mask_upsample, - scale_masks, -) -from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix +from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, ) +from utils.general import (check_dataset, check_img_size, check_suffix, ) +from utils.general import (coco80_to_coco91_class, increment_path, colorstr, ) from utils.plots import output_to_target, plot_images_boxes_and_masks +from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix +from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, ) from utils.torch_utils import select_device, time_sync -from PIL import Image + def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): - xywh = ( - (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() - ) # normalized xywh + xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") @@ -69,20 +48,13 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): if pred_masks is not None: pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [ - mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] - for mask in pred_masks - ] + rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): - pred_dict = { - "image_id": image_id, - "category_id": class_map[int(p[5])], - "bbox": [round(x, 3) for x in b], - "score": round(p[4], 5), - } + pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b], + "score": round(p[4], 5), } if pred_masks is not None: pred_dict["segmentation"] = rles[i] jdict.append(pred_dict) @@ -90,25 +62,9 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): @torch.no_grad() class Yolov5Evaluator: - def __init__( - self, - data, - conf_thres=0.001, - iou_thres=0.6, - device="", - single_cls=False, - augment=False, - verbose=False, - project="runs/val", - name="exp", - exist_ok=False, - half=True, - save_dir=Path(""), - nosave=False, - plots=True, - mask=False, - mask_downsample_ratio=1, - ) -> None: + def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False, + project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True, + mask=False, mask_downsample_ratio=1, ) -> None: self.data = check_dataset(data) # check self.conf_thres = conf_thres # confidence threshold self.iou_thres = iou_thres # NMS IoU threshold @@ -132,40 +88,14 @@ def __init__( self.confusion_matrix = ConfusionMatrix(nc=self.nc) self.dt = [0.0, 0.0, 0.0] self.names = {k: v for k, v in enumerate(self.data["names"])} - self.s = ( - ("%20s" + "%11s" * 10) - % ( - "Class", - "Images", - "Labels", - "Box:{P", - "R", - "mAP@.5", - "mAP@.5:.95}", - "Mask:{P", - "R", - "mAP@.5", - "mAP@.5:.95}", - ) - if self.mask - else ("%20s" + "%11s" * 6) - % ( - "Class", - "Images", - "Labels", - "P", - "R", - "mAP@.5", - "mAP@.5:.95", - ) - ) + self.s = (("%20s" + "%11s" * 10) % ( + "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5", + "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % ( + "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",)) # coco stuff - self.is_coco = isinstance(self.data.get("val"), str) and self.data[ - "val" - ].endswith( - "coco/val2017.txt" - ) # COCO dataset + self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith( + "coco/val2017.txt") # COCO dataset self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) self.jdict = [] self.iou_thres = 0.65 if self.is_coco else self.iou_thres @@ -192,9 +122,7 @@ def run_training(self, model, dataloader, compute_loss=None): # inference # masks will be `None` if training objection. - for batch_i, (img, targets, paths, shapes, masks) in enumerate( - tqdm(dataloader, desc=self.s) - ): + for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): # reset pred_masks self.pred_masks = [] img = img.to(self.device, non_blocking=True) @@ -213,11 +141,8 @@ def run_training(self, model, dataloader, compute_loss=None): # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks( - pred, - proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, - ) + pred_maski = self.get_predmasks(pred, proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, ) # for visualization if self.plots and batch_i < 3 and pred_maski is not None: @@ -234,29 +159,12 @@ def run_training(self, model, dataloader, compute_loss=None): # Return results model.float() # for training - return ( - ( - *self.metric.mean_results(), - *(self.total_loss.cpu() / len(dataloader)).tolist(), - ), - self.metric.get_maps(self.nc), - t, - ) - - def run( - self, - weights, - batch_size, - imgsz, - save_txt=False, - save_conf=False, - save_json=False, - task="val", - ): + return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), + self.metric.get_maps(self.nc), t,) + + def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ): """This is for native evaluation.""" - model, dataloader, imgsz = self.before_infer( - weights, batch_size, imgsz, save_txt, task - ) + model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task) self.seen = 0 # self.iouv.to(self.device) self.half &= self.device.type != "cpu" # half precision only supported on CUDA @@ -265,9 +173,7 @@ def run( model.eval() # inference - for batch_i, (img, targets, paths, shapes, masks) in enumerate( - tqdm(dataloader, desc=self.s) - ): + for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): # reset pred_masks self.pred_masks = [] img = img.to(self.device, non_blocking=True) @@ -289,11 +195,8 @@ def run( # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks( - pred, - proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, - ) + pred_maski = self.get_predmasks(pred, proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, ) # for visualization if self.plots and batch_i < 3 and pred_maski is not None: @@ -310,37 +213,21 @@ def run( # clone() is for plot_images work correctly predn = pred.clone() # 因为test时添加了0.5的padding,因此这里与数据加载的padding不一致,所以需要转入ratio_pad - scale_coords( - img[si].shape[1:], predn[:, :4], shape, ratio_pad - ) # native-space pred - + scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad) # native-space pred + # Save/log if save_txt and self.save_dir.exists(): # NOTE: convert coords to native space when save txt. # support save box preditions only - save_one_txt( - predn, - save_conf, - shape, - file=self.save_dir / "labels" / (path.stem + ".txt"), - ) + save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), ) if save_json and self.save_dir.exists(): # NOTE: convert coords to native space when save json. # if pred_maski is not None: # h, w, n - pred_maski = scale_masks( - img[si].shape[1:], - pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), - shape, - ratio_pad, - ) - save_one_json( - predn, - self.jdict, - path, - self.class_map, - pred_maski, - ) # append to COCO-JSON dictionary + pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, ratio_pad, ) + save_one_json(predn, self.jdict, path, self.class_map, + pred_maski, ) # append to COCO-JSON dictionary if self.plots and batch_i < 3: self.plot_images(batch_i, img, targets, masks, out, paths) @@ -357,42 +244,24 @@ def run( # Print speeds shape = (batch_size, 3, imgsz, imgsz) - print( - f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" - % t - ) + print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t) s = ( - f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" - if save_txt and self.save_dir.exists() - else "" - ) - print( - f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}" - ) + f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "") + print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}") # Return results - return ( - ( - *self.metric.mean_results(), - *(self.total_loss.cpu() / len(dataloader)).tolist(), - ), - self.metric.get_maps(self.nc), - t, - ) + return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), + self.metric.get_maps(self.nc), t,) def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): "prepare for evaluation without training." self.device = select_device(self.device, batch_size=batch_size) # Directories - self.save_dir = increment_path( - Path(self.project) / self.name, exist_ok=self.exist_ok - ) # increment run + self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok) # increment run if not self.nosave: - (self.save_dir / "labels" if save_txt else self.save_dir).mkdir( - parents=True, exist_ok=True - ) # make dir + (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model check_suffix(weights, ".pt") @@ -402,27 +271,11 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): # Data if self.device.type != "cpu": - model( - torch.zeros(1, 3, imgsz, imgsz) - .to(self.device) - .type_as(next(model.parameters())) - ) # run once + model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters()))) # run once pad = 0.0 if task == "speed" else 0.5 - task = ( - task if task in ("train", "val", "test") else "val" - ) # path to train/val/test images - dataloader = create_dataloader( - self.data[task], - imgsz, - batch_size, - gs, - self.single_cls, - pad=pad, - rect=True, - prefix=colorstr(f"{task}: "), - mask_head=self.mask, - mask_downsample_ratio=self.mask_downsample_ratio, - )[0] + task = (task if task in ("train", "val", "test") else "val") # path to train/val/test images + dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True, + prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0] return model, dataloader, imgsz def inference(self, model, img, targets, masks=None, compute_loss=None): @@ -435,29 +288,18 @@ def inference(self, model, img, targets, masks=None, compute_loss=None): self.dt[0] += t2 - t1 # Run model - out, train_out = model( - img, augment=self.augment - ) # inference and training outputs + out, train_out = model(img, augment=self.augment) # inference and training outputs self.dt[1] += time_sync() - t2 # Compute loss if compute_loss: - self.total_loss += compute_loss(train_out, targets, masks)[ - 1 - ] # box, obj, cls + self.total_loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls # Run NMS - targets[:, 2:] *= torch.Tensor([width, height, width, height]).to( - self.device - ) # to pixels + targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device) # to pixels t3 = time_sync() - out = self.nms( - prediction=out, - conf_thres=self.conf_thres, - iou_thres=self.iou_thres, - multi_label=True, - agnostic=self.single_cls, - ) + out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True, + agnostic=self.single_cls, ) self.dt[2] += time_sync() - t3 return out, train_out @@ -468,25 +310,18 @@ def after_infer(self): """ # Plot confusion matrix if self.plots and self.save_dir.exists(): - self.confusion_matrix.plot( - save_dir=self.save_dir, names=list(self.names.values()) - ) + self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values())) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*self.stats)] # to numpy box_or_mask_any = stats[0].any() or stats[1].any() stats = stats[1:] if not self.mask else stats if len(stats) and box_or_mask_any: - results = self.ap_per_class( - *stats, - self.plots, - self.save_dir if self.save_dir.exists() else None, - self.names, - ) + results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None, + self.names, ) self.metric.update(results) - nt = np.bincount( - stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc - ) # number of targets per class + nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64), + minlength=self.nc) # number of targets per class else: nt = torch.zeros(1) @@ -506,19 +341,13 @@ def process_batch(self, detections, labels, iouv): Returns: correct (Array[N, 10]), for 10 IoU levels """ - correct = torch.zeros( - detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device - ) + correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) iou = box_iou(labels[:, 1:], detections[:, :4]) x = torch.where( - (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]) - ) # IoU above threshold and classes match + (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])) # IoU above threshold and classes match if x[0].shape[0]: matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) # [label, detection, iou] + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -546,53 +375,29 @@ def get_predmasks(self, pred, proto_out, gt_shape): if proto_out is None or len(pred) == 0: return None process = process_mask_upsample if self.plots else process_mask - gt_shape = ( - gt_shape[0] * self.mask_downsample_ratio, - gt_shape[1] * self.mask_downsample_ratio, - ) + gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,) # n, h, w - pred_mask = ( - process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape) - .permute(2, 0, 1) - .contiguous() - ) + pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous()) return pred_mask def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): - assert not ( - (pred_maski is None) ^ (gt_masksi is None) - ), "`proto_out` and `gt_masksi` should be both None or both exist." + assert not ((pred_maski is None) ^ ( + gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist." if pred_maski is None and gt_masksi is None: return torch.zeros(0, self.niou, dtype=torch.bool) - correct = torch.zeros( - predn.shape[0], - self.iouv.shape[0], - dtype=torch.bool, - device=self.iouv.device, - ) + correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, ) if not self.plots: - gt_masksi = F.interpolate( - gt_masksi.unsqueeze(0), - pred_maski.shape[1:], - mode="bilinear", - align_corners=False, - ).squeeze(0) - - iou = mask_iou( - gt_masksi.view(gt_masksi.shape[0], -1), - pred_maski.view(pred_maski.shape[0], -1), - ) + gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear", + align_corners=False, ).squeeze(0) + + iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), ) x = torch.where( - (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]) - ) # IoU above threshold and classes match + (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match if x[0].shape[0]: matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) # [label, detection, iou] + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -609,15 +414,9 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski): if len(predn) == 0: if nl: - self.stats.append( - ( - torch.zeros(0, self.niou, dtype=torch.bool), # boxes - torch.zeros(0, self.niou, dtype=torch.bool), # masks - torch.Tensor(), - torch.Tensor(), - tcls, - ) - ) + self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), # boxes + torch.zeros(0, self.niou, dtype=torch.bool), # masks + torch.Tensor(), torch.Tensor(), tcls,)) return # Predictions @@ -632,24 +431,15 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski): correct_boxes = self.process_batch(predn, labelsn, self.iouv) # masks - correct_masks = self.process_batch_masks( - predn, pred_maski, gt_maski, labelsn - ) + correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn) if self.plots: self.confusion_matrix.process_batch(predn, labelsn) else: correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) - self.stats.append( - ( - correct_masks.cpu(), - correct_boxes.cpu(), - predn[:, 4].cpu(), - predn[:, 5].cpu(), - tcls, - ) - ) # (correct, conf, pcls, tcls) + self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(), + tcls,)) # (correct, conf, pcls, tcls) def print_metric(self, nt, stats): # Print results @@ -660,9 +450,7 @@ def print_metric(self, nt, stats): # TODO: self.seen support verbose. if self.verbose and self.nc > 1 and len(stats): for i, c in enumerate(self.metric.ap_class_index): - print( - pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)) - ) + print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))) def plot_images(self, i, img, targets, masks, out, paths): if not self.save_dir.exists(): @@ -670,47 +458,27 @@ def plot_images(self, i, img, targets, masks, out, paths): # plot ground truth f = self.save_dir / f"val_batch{i}_labels.jpg" # labels - Thread( - target=plot_images_boxes_and_masks, - args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), - daemon=True, - ).start() + Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), + daemon=True, ).start() f = self.save_dir / f"val_batch{i}_pred.jpg" # predictions # plot predition if len(self.pred_masks): - pred_masks = ( - torch.cat(self.pred_masks, dim=0) - if len(self.pred_masks) > 1 - else self.pred_masks[0] - ) + pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0]) else: pred_masks = None - Thread( - target=plot_images_boxes_and_masks, - args=( - img, - output_to_target(out), - pred_masks, - paths, - f, - self.names, - max(img.shape[2:]), - ), - daemon=True, - ).start() + Thread(target=plot_images_boxes_and_masks, + args=(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:]),), + daemon=True, ).start() import wandb if wandb.run: - res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:])) + res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names, + max(img.shape[2:])) res = Image.fromarray(res) - wandb.log({f"pred_{i}":wandb.Image(res)}) + wandb.log({f"pred_{i}": wandb.Image(res)}) def nms(self, **kwargs): - return ( - non_max_suppression_masks(**kwargs) - if self.mask - else non_max_suppression(**kwargs) - ) + return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) def ap_per_class(self, *args): return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args) diff --git a/seg_augmentations.py b/seg_augmentations.py index eddf1e31da22..409e021772b3 100644 --- a/seg_augmentations.py +++ b/seg_augmentations.py @@ -13,8 +13,8 @@ import numpy as np from utils.general import colorstr, check_version -from utils.segment import segment2box, resample_segments from utils.seg_metrics import bbox_ioa +from utils.segment import segment2box, resample_segments class Albumentations: @@ -26,23 +26,11 @@ def __init__(self): check_version(A.__version__, "1.0.3") # version requirement - self.transform = A.Compose( - [ - A.Blur(p=0.01), - A.MedianBlur(p=0.01), - A.ToGray(p=0.01), - A.CLAHE(p=0.01), - A.RandomBrightnessContrast(p=0.0), - A.RandomGamma(p=0.0), - A.ImageCompression(quality_lower=75, p=0.0), - ], - bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), - ) - - logging.info( - colorstr("albumentations: ") - + ", ".join(f"{x}" for x in self.transform.transforms if x.p) - ) + self.transform = A.Compose([A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01), + A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0), ], + bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), ) + + logging.info(colorstr("albumentations: ") + ", ".join(f"{x}" for x in self.transform.transforms if x.p)) except ImportError: # package not installed, skip pass except Exception as e: @@ -50,12 +38,8 @@ def __init__(self): def __call__(self, im, labels, p=1.0): if self.transform and random.random() < p: - new = self.transform( - image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0] - ) # transformed - im, labels = new["image"], np.array( - [[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])] - ) + new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed + im, labels = new["image"], np.array([[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])]) return im, labels @@ -71,9 +55,7 @@ def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) - im_hsv = cv2.merge( - (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)) - ) + im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed @@ -85,9 +67,7 @@ def hist_equalize(im, clahe=True, bgr=False): yuv[:, :, 0] = c.apply(yuv[:, :, 0]) else: yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram - return cv2.cvtColor( - yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB - ) # convert YUV image to RGB + return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB def replicate(im, labels): @@ -99,9 +79,7 @@ def replicate(im, labels): for i in s.argsort()[: round(s.size * 0.5)]: # smallest indices x1b, y1b, x2b, y2b = boxes[i] bh, bw = y2b - y1b, x2b - x1b - yc, xc = int(random.uniform(0, h - bh)), int( - random.uniform(0, w - bw) - ) # offset x, y + yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) @@ -109,15 +87,8 @@ def replicate(im, labels): return im, labels -def letterbox( - im, - new_shape=(640, 640), - color=(114, 114, 114), - auto=True, - scaleFill=False, - scaleup=True, - stride=32, - center=True, # center padding or left top padding +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32, + center=True, # center padding or left top padding ): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] @@ -148,25 +119,12 @@ def letterbox( im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)) if center else 0, int(round(dh + 0.1)) left, right = int(round(dw - 0.1)) if center else 0, int(round(dw + 0.1)) - im = cv2.copyMakeBorder( - im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color - ) # add border + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im, ratio, (dw, dh) -def random_perspective( - im, - targets=(), - segments=(), - degrees=10, - translate=0.1, - scale=0.1, - shear=10, - perspective=0.0, - border=(0, 0), - area_thr=0.2, - return_seg=False, -): +def random_perspective(im, targets=(), segments=(), degrees=10, translate=0.1, scale=0.1, shear=10, perspective=0.0, + border=(0, 0), area_thr=0.2, return_seg=False, ): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] @@ -198,24 +156,16 @@ def random_perspective( # Translation T = np.eye(3) - T[0, 2] = ( - random.uniform(0.5 - translate, 0.5 + translate) * width - ) # x translation (pixels) - T[1, 2] = ( - random.uniform(0.5 - translate, 0.5 + translate) * height - ) # y translation (pixels) + T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) + T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed if perspective: - im = cv2.warpPerspective( - im, M, dsize=(width, height), borderValue=(114, 114, 114) - ) + im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) else: # affine - im = cv2.warpAffine( - im, M[:2], dsize=(width, height), borderValue=(114, 114, 114) - ) + im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) # Visualize # import matplotlib.pyplot as plt @@ -235,9 +185,7 @@ def random_perspective( xy = np.ones((len(segment), 3)) xy[:, :2] = segment xy = xy @ M.T # transform - xy = ( - xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] - ) # perspective rescale or affine + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine # clip new[i] = segment2box(xy, width, height) @@ -245,38 +193,26 @@ def random_perspective( else: # warp boxes xy = np.ones((n * 4, 3)) - xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape( - n * 4, 2 - ) # x1y1, x2y2, x1y2, x2y1 + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape( - n, 8 - ) # perspective rescale or affine + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] - new = ( - np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - ) + new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T) # clip new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) # filter candidates - i = box_candidates( - box1=targets[:, 1:5].T * s, - box2=new.T, - cls=targets[:, 0], + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, cls=targets[:, 0], # area_thr=0.01 if use_segments else 0.10, - area_thr=area_thr, - ) + area_thr=area_thr, ) targets = targets[i] targets[:, 1:5] = new[i] - new_segments = ( - np.array(new_segments)[i] if len(new_segments) else np.array(new_segments) - ) + new_segments = (np.array(new_segments)[i] if len(new_segments) else np.array(new_segments)) return (im, targets, new_segments) if return_seg else (im, targets) @@ -294,13 +230,7 @@ def copy_paste(im, labels, segments, p=0.5): if (ioa < 0.30).all(): # allow 30% obscuration of existing labels labels = np.concatenate((labels, [[l[0], *box]]), 0) segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) - cv2.drawContours( - im_new, - [segments[j].astype(np.int32)], - -1, - (255, 255, 255), - cv2.FILLED, - ) + cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED, ) result = cv2.bitwise_and(src1=im, src2=im_new) result = cv2.flip(result, 1) # augment segments (flip left-right) @@ -315,9 +245,7 @@ def cutout(im, labels, p=0.5): # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 if random.random() < p: h, w = im.shape[:2] - scales = ( - [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 - ) # image size fraction + scales = ([0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16) # image size fraction for s in scales: mask_h = random.randint(1, int(h * s)) # create random masks mask_w = random.randint(1, int(w * s)) @@ -348,23 +276,12 @@ def mixup(im, labels, im2, labels2): return im, labels -def box_candidates( - box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16 -): # box1(4,n), box2(4,n) +def box_candidates(box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] - area_thr = ( - np.array(area_thr)[cls.astype(np.int)] - if isinstance(area_thr, list) - else area_thr - ) + area_thr = (np.array(area_thr)[cls.astype(np.int)] if isinstance(area_thr, list) else area_thr) if isinstance(area_thr, list) and len(area_thr) == 1: area_thr = area_thr[0] ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio - return ( - (w2 > wh_thr) - & (h2 > wh_thr) - & (w2 * h2 / (w1 * h1 + eps) > area_thr) - & (ar < ar_thr) - ) # candidates \ No newline at end of file + return ((w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)) # candidates diff --git a/seg_dataloaders.py b/seg_dataloaders.py index 7a3266e0e6f9..32f3e0af7127 100644 --- a/seg_dataloaders.py +++ b/seg_dataloaders.py @@ -5,45 +5,29 @@ Dataloaders """ -import glob +import json import logging -import os import time -import json -import yaml -import random +from functools import wraps from itertools import repeat from multiprocessing.pool import ThreadPool, Pool -from PIL import Image from pathlib import Path -from functools import wraps from zipfile import ZipFile -import cv2 -import numpy as np -import torch import torch.nn.functional as F -from torch.utils.data import distributed +import yaml from torch.utils.data import Dataset as torchDataset +from torch.utils.data import distributed +from torch.utils.data.sampler import BatchSampler as torchBatchSampler from torch.utils.data.sampler import RandomSampler +from torch.utils.data.sampler import Sampler from tqdm import tqdm - -from seg_augmentations import ( - Albumentations, - augment_hsv, - copy_paste, - letterbox, - mixup, - random_perspective, -) +from seg_augmentations import (Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, ) from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy from utils.torch_utils import torch_distributed_zero_first -from torch.utils.data.sampler import BatchSampler as torchBatchSampler -from torch.utils.data.sampler import Sampler - class _RepeatSampler: """ Sampler that repeats forever @@ -58,6 +42,7 @@ def __iter__(self): while True: yield from iter(self.sampler) + class YoloBatchSampler(torchBatchSampler): """ This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler. @@ -74,82 +59,33 @@ def __iter__(self): yield [(self.augment, idx) for idx in batch] -def create_dataloader( - path, - imgsz, - batch_size, - stride, - single_cls=False, - hyp=None, - augment=False, - cache=False, - pad=0.0, - rect=False, - rank=-1, - workers=8, - image_weights=False, - quad=False, - prefix="", - shuffle=False, - neg_dir="", - bg_dir="", - area_thr=0.2, - mask_head=False, - mask_downsample_ratio=1, -): +def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, + rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, neg_dir="", + bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, ): if rect and shuffle: print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False") shuffle = False data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): - dataset = data_load( - path, - imgsz, - batch_size, - augment=augment, # augment images + dataset = data_load(path, imgsz, batch_size, augment=augment, # augment images hyp=hyp, # augmentation hyperparameters rect=rect, # rectangular training - cache_images=cache, - single_cls=single_cls, - stride=int(stride), - pad=pad, - image_weights=image_weights, - prefix=prefix, - neg_dir=neg_dir, - bg_dir=bg_dir, - area_thr=area_thr, - ) + cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights, + prefix=prefix, neg_dir=neg_dir, bg_dir=bg_dir, area_thr=area_thr, ) if mask_head: dataset.downsample_ratio = mask_downsample_ratio batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers # sampler = InfiniteSampler(len(dataset), seed=0) - sampler = ( - distributed.DistributedSampler(dataset, shuffle=shuffle) - if rank != -1 - else RandomSampler(dataset) - ) + sampler = (distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else RandomSampler(dataset)) - batch_sampler = ( - YoloBatchSampler( - sampler=sampler, - batch_size=batch_size, - drop_last=False, - augment=augment, - ) - if not rect - else None - ) - dataloader = DataLoader( - dataset, - num_workers=nw, - batch_size=1 - if batch_sampler is not None - else batch_size, # batch-size and batch-sampler is exclusion - batch_sampler=batch_sampler, - pin_memory=True, + batch_sampler = (YoloBatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False, + augment=augment, ) if not rect else None) + dataloader = DataLoader(dataset, num_workers=nw, batch_size=1 if batch_sampler is not None else batch_size, + # batch-size and batch-sampler is exclusion + batch_sampler=batch_sampler, pin_memory=True, collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn, # Make sure each process has different random seed, especially for 'fork' method. # Check https://github.com/pytorch/pytorch/issues/63311 for more details. @@ -203,32 +139,14 @@ class LoadImagesAndLabels(Dataset): # YOLOv5 train_loader/val_loader, loads images and labels for training and validation cache_version = 0.6 # dataset labels *.cache version - def __init__( - self, - path, - img_size=640, - batch_size=16, - augment=False, - hyp=None, - rect=False, - image_weights=False, - cache_images=False, - single_cls=False, - stride=32, - pad=0.0, - prefix="", - neg_dir="", - bg_dir="", - area_thr=0.2, - ): + def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, + cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", neg_dir="", bg_dir="", area_thr=0.2, ): super().__init__(augment=augment) self.img_size = img_size self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect - self.mosaic = ( - self.augment and not self.rect - ) # load 4 images at a time into a mosaic (only during training) + self.mosaic = (self.augment and not self.rect) # load 4 images at a time into a mosaic (only during training) self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride self.path = path @@ -278,15 +196,11 @@ def cache_images(self, cache_images, prefix): """Cache images to disk or ram for faster speed.""" if cache_images == "disk": self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + "_npy") - self.img_npy = [ - self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files - ] + self.img_npy = [self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files] self.im_cache_dir.mkdir(parents=True, exist_ok=True) gb = 0 # Gigabytes of cached images self.img_hw0, self.img_hw = [None] * self.num_imgs, [None] * self.num_imgs - results = ThreadPool(NUM_THREADS).imap( - lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs)) - ) + results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs))) pbar = tqdm(enumerate(results), total=self.num_imgs) for i, x in pbar: if cache_images == "disk": @@ -294,11 +208,7 @@ def cache_images(self, cache_images, prefix): np.save(self.img_npy[i].as_posix(), x[0]) gb += self.img_npy[i].stat().st_size else: - ( - self.imgs[i], - self.img_hw0[i], - self.img_hw[i], - ) = x # im, hw_orig, hw_resized = load_image(self, i) + (self.imgs[i], self.img_hw0[i], self.img_hw[i],) = x # im, hw_orig, hw_resized = load_image(self, i) gb += self.imgs[i].nbytes pbar.desc = f"{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})" pbar.close() @@ -308,21 +218,16 @@ def get_img_files(self, p, prefix): try: f = [] # image files if p.is_dir(): # dir - f += glob.glob(str(p / "**" / "*.*"), recursive=True) - # f = list(p.rglob('*.*')) # pathlib + f += glob.glob(str(p / "**" / "*.*"), recursive=True) # f = list(p.rglob('*.*')) # pathlib elif p.is_file(): # file with open(p, "r") as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep - f += [ - x.replace("./", parent) if x.startswith("./") else x for x in t - ] # local to global path - # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) + f += [x.replace("./", parent) if x.startswith("./") else x for x in + t] # local to global path # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) else: raise Exception(f"{prefix}{p} does not exist") - img_files = sorted( - [x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS] - ) + img_files = sorted([x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS]) # img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib assert img_files, f"{prefix}No images found" except Exception as e: @@ -334,26 +239,19 @@ def get_neg_and_bg(self, neg_dir, bg_dir): img_neg_files, img_bg_files = [], [] if os.path.isdir(neg_dir): img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)] - logging.info( - colorstr("Negative dir: ") - + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training" - ) + logging.info(colorstr( + "Negative dir: ") + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training") if os.path.isdir(bg_dir): img_bg_files = [os.path.join(bg_dir, i) for i in os.listdir(bg_dir)] - logging.info( - colorstr("Background dir: ") - + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training" - ) + logging.info(colorstr( + "Background dir: ") + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training") return img_neg_files, img_bg_files def load_cache(self, cache_path, prefix): """Load labels from *.cache file.""" try: - cache, exists = ( - np.load(cache_path, allow_pickle=True).item(), - True, - ) # load dict + cache, exists = (np.load(cache_path, allow_pickle=True).item(), True,) # load dict assert cache["version"] == self.cache_version # same version assert cache["hash"] == get_hash(self.label_files + self.img_files) # same hash except: @@ -367,8 +265,7 @@ def load_cache(self, cache_path, prefix): if cache["msgs"]: logging.info("\n".join(cache["msgs"])) # display warnings assert ( - nf > 0 or not self.augment - ), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}" + nf > 0 or not self.augment), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}" # Read cache [cache.pop(k) for k in ("hash", "version", "msgs")] # remove items @@ -400,32 +297,18 @@ def update_rect(self, num_batches, pad): elif mini > 1: shapes[i] = [1, 1 / mini] - self.batch_shapes = ( - np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride - ) + self.batch_shapes = (np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride) def cache_labels(self, path=Path("./labels.cache"), prefix=""): """Cache labels to *.cache file if there is no *.cache file in local.""" # Cache dataset labels, check images and read shapes x = {} # dict - nm, nf, ne, nc, msgs = ( - 0, - 0, - 0, - 0, - [], - ) # number missing, found, empty, corrupt, messages + nm, nf, ne, nc, msgs = (0, 0, 0, 0, [],) # number missing, found, empty, corrupt, messages desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." with Pool(NUM_THREADS) as pool: - pbar = tqdm( - pool.imap( - verify_image_label, - zip(self.img_files, self.label_files, repeat(prefix)), - ), - desc=desc, - total=len(self.img_files), - ) + pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix)), ), + desc=desc, total=len(self.img_files), ) for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f nf += nf_f @@ -451,9 +334,7 @@ def cache_labels(self, path=Path("./labels.cache"), prefix=""): path.with_suffix(".cache.npy").rename(path) # remove .npy suffix logging.info(f"{prefix}New cache created: {path}") except Exception as e: - logging.info( - f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}" - ) # path not writeable + logging.info(f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}") # path not writeable return x def __len__(self): @@ -487,33 +368,21 @@ def __getitem__(self, index): # Letterbox shape = ( - self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size - ) # final letterboxed shape + self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size) # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() if labels.size: # normalized xywh to pixel xyxy format - labels[:, 1:] = xywhn2xyxy( - labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1] - ) + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: - img, labels = random_perspective( - img, - labels, - degrees=hyp["degrees"], - translate=hyp["translate"], - scale=hyp["scale"], - shear=hyp["shear"], - perspective=hyp["perspective"], - ) + img, labels = random_perspective(img, labels, degrees=hyp["degrees"], translate=hyp["translate"], + scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], ) nl = len(labels) # number of labels if nl: - labels[:, 1:5] = xyxy2xywhn( - labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3 - ) + labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) if self.augment: # Albumentations @@ -535,8 +404,7 @@ def __getitem__(self, index): if nl: labels[:, 1] = 1 - labels[:, 1] - # Cutouts - # labels = cutout(img, labels, p=0.5) + # Cutouts # labels = cutout(img, labels, p=0.5) labels_out = torch.zeros((nl, 6)) if nl: @@ -567,33 +435,13 @@ def collate_fn4(batch): for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW i *= 4 if random.random() < 0.5: - im = F.interpolate( - img[i].unsqueeze(0).float(), - scale_factor=2.0, - mode="bilinear", - align_corners=False, - )[0].type(img[i].type()) + im = \ + F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode="bilinear", align_corners=False, )[ + 0].type(img[i].type()) l = label[i] else: - im = torch.cat( - ( - torch.cat((img[i], img[i + 1]), 1), - torch.cat((img[i + 2], img[i + 3]), 1), - ), - 2, - ) - l = ( - torch.cat( - ( - label[i], - label[i + 1] + ho, - label[i + 2] + wo, - label[i + 3] + ho + wo, - ), - 0, - ) - * s - ) + im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1),), 2, ) + l = (torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo,), 0, ) * s) img4.append(im) label4.append(l) @@ -604,42 +452,12 @@ def collate_fn4(batch): class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing - def __init__( - self, - path, - img_size=640, - batch_size=16, - augment=False, - hyp=None, - rect=False, - image_weights=False, - cache_images=False, - single_cls=False, - stride=32, - pad=0, - prefix="", - neg_dir="", - bg_dir="", - area_thr=0.2, - downsample_ratio=1, # return dowmsample mask + def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, + cache_images=False, single_cls=False, stride=32, pad=0, prefix="", neg_dir="", bg_dir="", area_thr=0.2, + downsample_ratio=1, # return dowmsample mask ): - super().__init__( - path, - img_size, - batch_size, - augment, - hyp, - rect, - image_weights, - cache_images, - single_cls, - stride, - pad, - prefix, - neg_dir, - bg_dir, - area_thr, - ) + super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, + stride, pad, prefix, neg_dir, bg_dir, area_thr, ) self.downsample_ratio = downsample_ratio @Dataset.mosaic_getitem @@ -666,8 +484,7 @@ def __getitem__(self, index): # Letterbox shape = ( - self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size - ) # final letterboxed shape + self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size) # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling @@ -677,51 +494,25 @@ def __getitem__(self, index): # TODO if len(segments): for i_s in range(len(segments)): - segments[i_s] = xyn2xy( - segments[i_s], - ratio[0] * w, - ratio[1] * h, - padw=pad[0], - padh=pad[1], - ) + segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], ) if labels.size: # normalized xywh to pixel xyxy format - labels[:, 1:] = xywhn2xyxy( - labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1] - ) + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: - img, labels, segments = random_perspective( - img, - labels, - segments=segments, - degrees=hyp["degrees"], - translate=hyp["translate"], - scale=hyp["scale"], - shear=hyp["shear"], - perspective=hyp["perspective"], - return_seg=True, - ) + img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"], + translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], + return_seg=True, ) nl = len(labels) # number of labels if nl: - labels[:, 1:5] = xyxy2xywhn( - labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3 - ) + labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) for si in range(len(segments)): - mask = polygon2mask_downsample( - img.shape[:2], - [segments[si].reshape(-1)], - downsample_ratio=self.downsample_ratio, - ) + mask = polygon2mask_downsample(img.shape[:2], [segments[si].reshape(-1)], + downsample_ratio=self.downsample_ratio, ) masks.append(torch.from_numpy(mask.astype(np.float32))) - masks = ( - torch.stack(masks, axis=0) - if len(masks) - else torch.zeros( - nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio - ) - ) + masks = (torch.stack(masks, axis=0) if len(masks) else torch.zeros(nl, img.shape[0] // self.downsample_ratio, + img.shape[1] // self.downsample_ratio)) # TODO: albumentations support if self.augment: # Albumentations @@ -747,8 +538,7 @@ def __getitem__(self, index): labels[:, 1] = 1 - labels[:, 1] masks = torch.flip(masks, dims=[2]) - # Cutouts - # labels = cutout(img, labels, p=0.5) + # Cutouts # labels = cutout(img, labels, p=0.5) labels_out = torch.zeros((nl, 6)) if nl: @@ -786,18 +576,11 @@ def load_image(self, i): h0, w0 = im.shape[:2] # orig hw r = self.img_size / max(h0, w0) # ratio if r != 1: # if sizes are not equal - im = cv2.resize( - im, - (int(w0 * r), int(h0 * r)), - interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR, - ) + im = cv2.resize(im, (int(w0 * r), int(h0 * r)), + interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR, ) return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized else: - return ( - self.imgs[i], - self.img_hw0[i], - self.img_hw[i], - ) # im, hw_original, hw_resized + return (self.imgs[i], self.img_hw0[i], self.img_hw[i],) # im, hw_original, hw_resized def load_neg_image(self, index): @@ -815,9 +598,7 @@ def load_neg_image(self, index): def load_bg_image(self, index): path = self.img_files[index] bg_path = self.img_bg_files[np.random.randint(0, len(self.img_bg_files))] - img, coord, _, (w, h) = paste1( - path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5) - ) + img, coord, _, (w, h) = paste1(path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5)) label = self.labels[index] label[:, 1] = (label[:, 1] * w + coord[0]) / img.shape[1] label[:, 2] = (label[:, 2] * h + coord[1]) / img.shape[0] @@ -858,22 +639,10 @@ def load_mosaic(self, index, return_seg=False): img, _, (h, w) = load_neg_image(self, index) # place img in img4 if j == 0: - img4 = np.full( - (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8 - ) # base image with 4 tiles + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles if i == 0: # top left - x1a, y1a, x2a, y2a = ( - max(xc - w, 0), - max(yc - h, 0), - xc, - yc, - ) # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = ( - w - (x2a - x1a), - h - (y2a - y1a), - w, - h, - ) # xmin, ymin, xmax, ymax (small image) + x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,) # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,) # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h @@ -899,9 +668,7 @@ def load_mosaic(self, index, return_seg=False): labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: - labels[:, 1:] = xywhn2xyxy( - labels[:, 1:], w, h, padw, padh - ) # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] labels4.append(labels) segments4.extend(segments) @@ -914,19 +681,9 @@ def load_mosaic(self, index, return_seg=False): # Augment img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) - results = random_perspective( - img4, - labels4, - segments4, - degrees=self.hyp["degrees"], - translate=self.hyp["translate"], - scale=self.hyp["scale"], - shear=self.hyp["shear"], - perspective=self.hyp["perspective"], - border=self.mosaic_border, - area_thr=self.area_thr, - return_seg=return_seg, - ) # border to remove + results = random_perspective(img4, labels4, segments4, degrees=self.hyp["degrees"], translate=self.hyp["translate"], + scale=self.hyp["scale"], shear=self.hyp["shear"], perspective=self.hyp["perspective"], + border=self.mosaic_border, area_thr=self.area_thr, return_seg=return_seg, ) # border to remove # return (img4, labels4, segments4) if return_seg else (img4, labels4) return results @@ -943,9 +700,7 @@ def load_mosaic9(self, index): # place img in img9 if i == 0: # center - img9 = np.full( - (s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8 - ) # base image with 4 tiles + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles h0, w0 = h, w c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates elif i == 1: # top @@ -971,20 +726,18 @@ def load_mosaic9(self, index): # Labels labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: - labels[:, 1:] = xywhn2xyxy( - labels[:, 1:], w, h, padx, pady - ) # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padx, pady) for x in segments] labels9.append(labels) segments9.extend(segments) # Image - img9[y1:y2, x1:x2] = img[y1 - pady :, x1 - padx :] # img9[ymin:ymax, xmin:xmax] + img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] hp, wp = h, w # height, width previous # Offset yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y - img9 = img9[yc : yc + 2 * s, xc : xc + 2 * s] + img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s] # Concat/clip labels labels9 = np.concatenate(labels9, 0) @@ -998,17 +751,9 @@ def load_mosaic9(self, index): # img9, labels9 = replicate(img9, labels9) # replicate # Augment - img9, labels9 = random_perspective( - img9, - labels9, - segments9, - degrees=self.hyp["degrees"], - translate=self.hyp["translate"], - scale=self.hyp["scale"], - shear=self.hyp["shear"], - perspective=self.hyp["perspective"], - border=self.mosaic_border, - ) # border to remove + img9, labels9 = random_perspective(img9, labels9, segments9, degrees=self.hyp["degrees"], + translate=self.hyp["translate"], scale=self.hyp["scale"], shear=self.hyp["shear"], + perspective=self.hyp["perspective"], border=self.mosaic_border, ) # border to remove return img9, labels9 @@ -1034,11 +779,7 @@ def unzip(path): assert Path(path).is_file(), f"Error unzipping {path}, file not found" ZipFile(path).extractall(path=path.parent) # unzip dir = path.with_suffix("") # dataset directory == zip name - return ( - True, - str(dir), - next(dir.rglob("*.yaml")), - ) # zipped, data_dir, yaml_path + return (True, str(dir), next(dir.rglob("*.yaml")),) # zipped, data_dir, yaml_path else: # path is data.yaml return False, None, path @@ -1057,11 +798,7 @@ def hub_ops(f, max_dim=1920): im_height, im_width = im.shape[:2] r = max_dim / max(im_height, im_width) # ratio if r < 1.0: # image too large - im = cv2.resize( - im, - (int(im_width * r), int(im_height * r)), - interpolation=cv2.INTER_LINEAR, - ) + im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_LINEAR, ) cv2.imwrite(str(f_new), im) zipped, data_dir, yaml_path = unzip(Path(path)) @@ -1081,27 +818,17 @@ def hub_ops(f, max_dim=1920): for label in tqdm(dataset.labels, total=dataset.num_imgs, desc="Statistics"): x.append(np.bincount(label[:, 0].astype(int), minlength=data["nc"])) x = np.array(x) # shape(128x80) - stats[split] = { - "instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()}, - "image_stats": { - "total": dataset.num_imgs, - "unlabelled": int(np.all(x == 0, 1).sum()), - "per_class": (x > 0).sum(0).tolist(), - }, - "labels": [ - {str(Path(k).name): round_labels(v.tolist())} - for k, v in zip(dataset.img_files, dataset.labels) - ], - } + stats[split] = {"instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()}, + "image_stats": {"total": dataset.num_imgs, "unlabelled": int(np.all(x == 0, 1).sum()), + "per_class": (x > 0).sum(0).tolist(), }, + "labels": [{str(Path(k).name): round_labels(v.tolist())} for k, v in + zip(dataset.img_files, dataset.labels)], } if hub: im_dir = hub_dir / "images" im_dir.mkdir(parents=True, exist_ok=True) - for _ in tqdm( - ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), - total=dataset.num_imgs, - desc="HUB Ops", - ): + for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.num_imgs, + desc="HUB Ops", ): pass # Profile @@ -1142,39 +869,16 @@ def hub_ops(f, max_dim=1920): import uuid import torch import cv2 -import numpy as np import random from pathlib import Path -from PIL import Image, ImageOps, ExifTags +from PIL import ImageOps, ExifTags from utils.segment import segments2boxes from utils.general import xywh2xyxy - # Parameters HELP_URL = "https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data" -IMG_FORMATS = [ - "bmp", - "jpg", - "jpeg", - "png", - "tif", - "tiff", - "dng", - "webp", - "mpo", -] # acceptable image suffixes -VID_FORMATS = [ - "mov", - "avi", - "mp4", - "mpg", - "mpeg", - "m4v", - "wmv", - "mkv", - "vdo", - "flv", -] # acceptable video suffixes +IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo", ] # acceptable image suffixes +VID_FORMATS = ["mov", "avi", "mp4", "mpg", "mpeg", "m4v", "wmv", "mkv", "vdo", "flv", ] # acceptable video suffixes NUM_THREADS = min(8, os.cpu_count()) # number of multiprocessing threads # Get orientation exif tag @@ -1182,6 +886,7 @@ def hub_ops(f, max_dim=1920): if ExifTags.TAGS[orientation] == "Orientation": break + def get_hash(paths): # Returns a single hash value of a list of paths (files or dirs) size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes @@ -1216,15 +921,8 @@ def exif_transpose(image): exif = image.getexif() orientation = exif.get(0x0112, 1) # default 1 if orientation > 1: - method = { - 2: Image.FLIP_LEFT_RIGHT, - 3: Image.ROTATE_180, - 4: Image.FLIP_TOP_BOTTOM, - 5: Image.TRANSPOSE, - 6: Image.ROTATE_270, - 7: Image.TRANSVERSE, - 8: Image.ROTATE_90, - }.get(orientation) + method = {2: Image.FLIP_LEFT_RIGHT, 3: Image.ROTATE_180, 4: Image.FLIP_TOP_BOTTOM, 5: Image.TRANSPOSE, + 6: Image.ROTATE_270, 7: Image.TRANSVERSE, 8: Image.ROTATE_90, }.get(orientation) if method is not None: image = image.transpose(method) del exif[0x0112] @@ -1239,10 +937,7 @@ def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): polygons (np.ndarray): [N, M], N is the number of polygons, M is the number of points(Be divided by 2). """ - img_size = ( - img_size[0] // downsample_ratio, - img_size[1] // downsample_ratio - ) + img_size = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) mask = np.zeros(img_size, dtype=np.uint8) polygons = np.asarray(polygons) / downsample_ratio polygons = polygons.astype(np.int32) @@ -1272,19 +967,14 @@ def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1): shape = polygons.shape polygons = polygons.reshape(shape[0], -1, 2) cv2.fillPoly(mask, polygons, color=color) - nh, nw = ( - img_size[0] // downsample_ratio, - img_size[1] // downsample_ratio - ) + nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) mask = cv2.resize(mask, (nw, nh)) return mask + def img2label_paths(img_paths): # Define label paths as a function of image paths - sa, sb = ( - os.sep + "images" + os.sep, - os.sep + "labels" + os.sep, - ) # /images/, /labels/ substrings + sa, sb = (os.sep + "images" + os.sep, os.sep + "labels" + os.sep,) # /images/, /labels/ substrings return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths] @@ -1302,14 +992,11 @@ def flatten_recursive(path="../datasets/coco128"): for file in tqdm(glob.glob(str(Path(path)) + "/**/*.*", recursive=True)): shutil.copyfile(file, new_path / Path(file).name) -def extract_boxes( - path="../datasets/coco128", -): # from utils.datasets import *; extract_boxes() + +def extract_boxes(path="../datasets/coco128", ): # from utils.datasets import *; extract_boxes() # Convert detection dataset into classification dataset, with one directory per class path = Path(path) # images dir - shutil.rmtree(path / "classifier") if ( - path / "classifier" - ).is_dir() else None # remove existing + shutil.rmtree(path / "classifier") if (path / "classifier").is_dir() else None # remove existing files = list(path.rglob("*.*")) n = len(files) # number of files for im_file in tqdm(files, total=n): @@ -1322,18 +1009,11 @@ def extract_boxes( lb_file = Path(img2label_paths([str(im_file)])[0]) if Path(lb_file).exists(): with open(lb_file, "r") as f: - lb = np.array( - [x.split() for x in f.read().strip().splitlines()], - dtype=np.float32, - ) # labels + lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32, ) # labels for j, x in enumerate(lb): c = int(x[0]) # class - f = ( - (path / "classifier") - / f"{c}" - / f"{path.stem}_{im_file.stem}_{j}.jpg" - ) # new filename + f = ((path / "classifier") / f"{c}" / f"{path.stem}_{im_file.stem}_{j}.jpg") # new filename if not f.parent.is_dir(): f.parent.mkdir(parents=True) @@ -1344,14 +1024,10 @@ def extract_boxes( b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) - assert cv2.imwrite( - str(f), im[b[1] : b[3], b[0] : b[2]] - ), f"box failure in {f}" + assert cv2.imwrite(str(f), im[b[1]: b[3], b[0]: b[2]]), f"box failure in {f}" -def autosplit( - path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False -): +def autosplit(path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False): """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files Usage: from utils.datasets import *; autosplit() Arguments @@ -1360,47 +1036,25 @@ def autosplit( annotated_only: Only use images with an annotated txt file """ path = Path(path) # images dir - files = sorted( - [x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS] - ) # image files only + files = sorted([x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS]) # image files only n = len(files) # number of files random.seed(0) # for reproducibility - indices = random.choices( - [0, 1, 2], weights=weights, k=n - ) # assign each image to a split - - txt = [ - "autosplit_train.txt", - "autosplit_val.txt", - "autosplit_test.txt", - ] # 3 txt files + indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split + + txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt", ] # 3 txt files [(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing - print( - f"Autosplitting images from {path}" - + ", using *.txt labeled images only" * annotated_only - ) + print(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only) for i, img in tqdm(zip(indices, files), total=n): - if ( - not annotated_only or Path(img2label_paths([str(img)])[0]).exists() - ): # check label + if (not annotated_only or Path(img2label_paths([str(img)])[0]).exists()): # check label with open(path.parent / txt[i], "a") as f: - f.write( - "./" + img.relative_to(path.parent).as_posix() + "\n" - ) # add image to txt file + f.write("./" + img.relative_to(path.parent).as_posix() + "\n") # add image to txt file def verify_image_label(args): # Verify one image-label pair im_file, lb_file, prefix = args - nm, nf, ne, nc, msg, segments = ( - 0, - 0, - 0, - 0, - "", - [], - ) # number (missing, found, empty, corrupt), message, segments + nm, nf, ne, nc, msg, segments = (0, 0, 0, 0, "", [],) # number (missing, found, empty, corrupt), message, segments try: # verify images im = Image.open(im_file) @@ -1412,9 +1066,7 @@ def verify_image_label(args): with open(im_file, "rb") as f: f.seek(-2, 2) if f.read() != b"\xff\xd9": # corrupt JPEG - ImageOps.exif_transpose(Image.open(im_file)).save( - im_file, "JPEG", subsampling=0, quality=100 - ) + ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100) msg = f"{prefix}WARNING: {im_file}: corrupt JPEG restored and saved" # verify labels @@ -1424,22 +1076,14 @@ def verify_image_label(args): l = [x.split() for x in f.read().strip().splitlines() if len(x)] if any([len(x) > 6 for x in l]): # is segment classes = np.array([x[0] for x in l], dtype=np.float32) - segments = [ - np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l - ] # (cls, xy1...) - l = np.concatenate( - (classes.reshape(-1, 1), segments2boxes(segments)), 1 - ) # (cls, xywh) + segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...) + l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) l = np.array(l, dtype=np.float32) nl = len(l) if nl: - assert ( - l.shape[1] == 5 - ), f"labels require 5 columns, {l.shape[1]} columns detected" + assert (l.shape[1] == 5), f"labels require 5 columns, {l.shape[1]} columns detected" assert (l >= 0).all(), f"negative label values {l[l < 0]}" - assert ( - l[:, 1:] <= 1 - ).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}" + assert (l[:, 1:] <= 1).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}" l, idx = np.unique(l, axis=0, return_index=True) # remove duplicate rows # NOTE: `np.unique` will change the order of `l`, so adjust the segments order too. segments = [segments[i] for i in idx] if len(segments) > 0 else segments @@ -1456,9 +1100,11 @@ def verify_image_label(args): nc = 1 msg = f"{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}" return [None, None, None, None, nm, nf, ne, nc, msg] - + + from torch.utils.data import DataLoader as torchDataLoader + class DataLoader(torchDataLoader): """ Lightnet dataloader that enables on the fly resizing of the images. @@ -1494,9 +1140,10 @@ def __iter__(self): # REFACTOR IN A NEW FILE -from PIL import Image, ImageDraw +from PIL import Image import numpy as np from PIL import ImageFile + # import numbers ImageFile.LOAD_TRUNCATED_IMAGES = True @@ -1515,6 +1162,7 @@ def get_raito(new_size, original_size): # # yolov5 way return min(new_size[0] / original_size[0], new_size[1] / original_size[1]) + def imresize(img, new_size): """Resize the img with new_size by PIL(keep aspect). @@ -1529,6 +1177,7 @@ def imresize(img, new_size): img = img.resize((int(old_size[0] * ratio), int(old_size[1] * ratio))) return img + def get_wh(a, b): return np.random.randint(a, b) @@ -1554,9 +1203,7 @@ def paste2(sample1, sample2, background, scale=1.2): background.paste(sample2, (x2, y2)) # background = background.resize((416, 416)) - return np.array(background), (x1, y1, x2, y2), background - # print(background.size) - # background.show() + return np.array(background), (x1, y1, x2, y2), background # print(background.size) # background.show() def paste1(sample, background, bg_size, fg_scale=1.5): diff --git a/utils/seg_loss.py b/utils/seg_loss.py index d8d155739273..8ffb4439c2f0 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -1,12 +1,13 @@ # TODO: merge with loss.py.. Optimize speed import torch -from utils.torch_utils import de_parallel, is_parallel -from utils.general import xywh2xyxy, Profile -from utils.segment import mask_iou, masks_iou, crop -import torch.nn.functional as F import torch.nn as nn +import torch.nn.functional as F + +from utils.general import xywh2xyxy from utils.loss import smooth_BCE, FocalLoss +from utils.segment import masks_iou, crop +from utils.torch_utils import is_parallel class ComputeLoss: @@ -23,9 +24,7 @@ def __init__(self, model, autobalance=False): self.mask_loss = MaskIOULoss() # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 - self.cp, self.cn = smooth_BCE( - eps=h.get("label_smoothing", 0.0) - ) # positive, negative BCE targets + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma @@ -35,13 +34,7 @@ def __init__(self, model, autobalance=False): det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index - self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = ( - BCEcls, - BCEobj, - 1.0, - h, - autobalance, - ) + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (BCEcls, BCEobj, 1.0, h, autobalance,) for k in "na", "nc", "nl", "anchors", "nm": if hasattr(det, k): setattr(self, k, getattr(det, k)) @@ -54,10 +47,7 @@ def __call__(self, p, targets, masks=None): # predictions, targets, model def loss_detection(self, p, targets): device = targets.device lcls, lbox, lobj = ( - torch.zeros(1, device=device), - torch.zeros(1, device=device), - torch.zeros(1, device=device), - ) + torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),) tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets # Losses @@ -73,22 +63,14 @@ def loss_detection(self, p, targets): pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box - iou = bbox_iou( - pbox.T, tbox[i], x1y1x2y2=False, CIoU=True - ) # iou(prediction, target) + iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness score_iou = iou.detach().clamp(0).type(tobj.dtype) if self.sort_obj_iou: sort_id = torch.argsort(score_iou) - b, a, gj, gi, score_iou = ( - b[sort_id], - a[sort_id], - gj[sort_id], - gi[sort_id], - score_iou[sort_id], - ) + b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],) tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio # Classification @@ -97,9 +79,7 @@ def loss_detection(self, p, targets): t[range(n), tcls[i]] = self.cp lcls += self.BCEcls(ps[:, 5:], t) # BCE - # Append targets to text file - # with open('targets.txt', 'a') as file: - # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] + # Append targets to text file # with open('targets.txt', 'a') as file: # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss @@ -128,14 +108,9 @@ def loss_segment(self, preds, targets, masks): device = targets.device lcls, lbox, lobj, lseg = ( - torch.zeros(1, device=device), - torch.zeros(1, device=device), - torch.zeros(1, device=device), - torch.zeros(1, device=device), - ) - tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks( - p, targets - ) # targets + torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device), + torch.zeros(1, device=device),) + tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx @@ -149,47 +124,32 @@ def loss_segment(self, preds, targets, masks): pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box - iou = bbox_iou( - pbox.T, tbox[i], x1y1x2y2=False, CIoU=True - ) # iou(prediction, target) + iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness score_iou = iou.detach().clamp(0).type(tobj.dtype) if self.sort_obj_iou: sort_id = torch.argsort(score_iou) - b, a, gj, gi, score_iou = ( - b[sort_id], - a[sort_id], - gj[sort_id], - gi[sort_id], - score_iou[sort_id], - ) + b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],) tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, self.nm :], self.cn, device=device) # targets + t = torch.full_like(ps[:, self.nm:], self.cn, device=device) # targets t[range(n), tcls[i]] = self.cp - lcls += self.BCEcls(ps[:, self.nm :], t) # BCE + lcls += self.BCEcls(ps[:, self.nm:], t) # BCE # Mask Regression mask_gt = masks[tidxs[i]] - downsampled_masks = F.interpolate( - mask_gt[None, :], - (mask_h, mask_w), - mode="bilinear", - align_corners=False, - ).squeeze(0) + downsampled_masks = F.interpolate(mask_gt[None, :], (mask_h, mask_w), mode="bilinear", + align_corners=False, ).squeeze(0) mxywh = xywh[i] mws, mhs = mxywh[:, 2:].T mws, mhs = mws / pi.shape[3], mhs / pi.shape[2] - mxywhs = ( - mxywh - / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] - * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device) - ) + mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] * torch.tensor( + [mask_w, mask_h, mask_w, mask_h], device=mxywh.device)) mxyxys = xywh2xyxy(mxywhs) batch_lseg = torch.zeros(1, device=device) @@ -200,7 +160,7 @@ def loss_segment(self, preds, targets, masks): mw, mh = mws[index], mhs[index] mxyxy = mxyxys[index] - psi = ps[index][:, 5 : self.nm] + psi = ps[index][:, 5: self.nm] proto = proto_out[bi] batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) @@ -241,25 +201,13 @@ def build_targets(self, p, targets): tcls, tbox, indices, anch = [], [], [], [] gain = torch.ones(7, device=targets.device) # normalized to gridspace gain ai = ( - torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) - ) # same as .repeat_interleave(nt) + torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices g = 0.5 # bias - off = ( - torch.tensor( - [ - [0, 0], - [1, 0], - [0, 1], - [-1, 0], - [0, -1], # j,k,l,m - # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], - device=targets.device, - ).float() - * g - ) # offsets + off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], device=targets.device, ).float() * g) # offsets for i in range(self.nl): anchors = self.anchors[i] @@ -295,9 +243,7 @@ def build_targets(self, p, targets): # Append a = t[:, 6].long() # anchor indices - indices.append( - (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)) - ) # image, anchor, grid indices + indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class @@ -310,31 +256,16 @@ def build_targets_for_masks(self, p, targets): tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], [] gain = torch.ones(8, device=targets.device) # normalized to gridspace gain ai = ( - torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) - ) # same as .repeat_interleave(nt) + torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)) # same as .repeat_interleave(nt) ti = ( - torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1) - ) # same as .repeat_interleave(nt) + torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)) # same as .repeat_interleave(nt) - targets = torch.cat( - (targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2 - ) # append anchor indices + targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2) # append anchor indices g = 0.5 # bias - off = ( - torch.tensor( - [ - [0, 0], - [1, 0], - [0, 1], - [-1, 0], - [0, -1], # j,k,l,m - # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], - device=targets.device, - ).float() - * g - ) # offsets + off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], device=targets.device, ).float() * g) # offsets for i in range(self.nl): anchors = self.anchors[i] @@ -371,9 +302,7 @@ def build_targets_for_masks(self, p, targets): # Append a = t[:, 6].long() # anchor indices tidx = t[:, 7].long() - indices.append( - (b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)) - ) # image, anchor, grid indices + indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class @@ -381,7 +310,7 @@ def build_targets_for_masks(self, p, targets): xywh.append(torch.cat((gxy, gwh), 1)) return tcls, tbox, indices, anch, tidxs, xywh - + class MaskIOULoss(nn.Module): def __init__(self) -> None: @@ -404,7 +333,9 @@ def forward(self, pred_mask, gt_mask, mxyxy=None): iou = masks_iou(pred_mask, gt_mask) return 1.0 - iou -import math + +import math + def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 @@ -422,8 +353,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= # Intersection area inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( - torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) - ).clamp(0) + torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) # Union Area w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps @@ -432,24 +362,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= iou = inter / union if GIoU or DIoU or CIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min( - b1_x1, b2_x1 - ) # convex (smallest enclosing box) width + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared - rho2 = ( - (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 - + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 - ) / 4 # center distance squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + ( + b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared if DIoU: return iou - rho2 / c2 # DIoU - elif ( - CIoU - ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow( - torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 - ) + elif (CIoU): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU @@ -457,4 +379,4 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= c_area = cw * ch + eps # convex area return iou - (c_area - union) / c_area # GIoU else: - return iou # IoU \ No newline at end of file + return iou # IoU diff --git a/utils/seg_metrics.py b/utils/seg_metrics.py index 8646931bed00..9c6133118dfa 100644 --- a/utils/seg_metrics.py +++ b/utils/seg_metrics.py @@ -5,12 +5,12 @@ import math import warnings -from easydict import EasyDict as edict from pathlib import Path import matplotlib.pyplot as plt import numpy as np import torch +from easydict import EasyDict as edict def fitness(x, masks=False): @@ -22,9 +22,7 @@ def fitness(x, masks=False): return (x[:, :4] * w).sum(1) -def ap_per_class( - tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix="" -): +def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""): """Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments @@ -64,9 +62,7 @@ def ap_per_class( # Recall recall = tpc / (n_l + 1e-16) # recall curve - r[ci] = np.interp( - -px, -conf[i], recall[:, 0], left=0 - ) # negative x, xp because xp decreases + r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases # Precision precision = tpc / (tpc + fpc) # precision curve @@ -80,81 +76,35 @@ def ap_per_class( # Compute F1 (harmonic mean of precision and recall) f1 = 2 * p * r / (p + r + 1e-16) - names = [ - v for k, v in names.items() if k in unique_classes - ] # list: only classes that have data + names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data names = {i: v for i, v in enumerate(names)} # to dict if plot and save_dir is not None: plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names) - plot_mc_curve( - px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1" - ) - plot_mc_curve( - px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision" - ) - plot_mc_curve( - px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall" - ) + plot_mc_curve(px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1") + plot_mc_curve(px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision") + plot_mc_curve(px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall") i = f1.mean(0).argmax() # max F1 index return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32") -def ap_per_class_box_and_mask( - tp_m, - tp_b, - conf, - pred_cls, - target_cls, - plot=False, - save_dir=".", - names=(), -): +def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ): """ Args: tp_b: tp of boxes. tp_m: tp of masks. other arguments see `func: ap_per_class`. """ - results_boxes = ap_per_class( - tp_b, - conf, - pred_cls, - target_cls, - plot=plot, - save_dir=save_dir, - names=names, - prefix="Box", - ) - results_masks = ap_per_class( - tp_m, - conf, - pred_cls, - target_cls, - plot=plot, - save_dir=save_dir, - names=names, - prefix="Mask", - ) - - results = edict( - { - "boxes": { - "p": results_boxes[0], - "r": results_boxes[1], - "ap": results_boxes[2], - "f1": results_boxes[3], - "ap_class": results_boxes[4], - }, - "masks": { - "p": results_masks[0], - "r": results_masks[1], - "ap": results_masks[2], - "f1": results_masks[3], - "ap_class": results_masks[4], - }, - } - ) + results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, + prefix="Box", ) + results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, + prefix="Mask", ) + + results = edict({ + "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3], + "ap_class": results_boxes[4], }, + "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3], + "ap_class": results_masks[4], }, }) return results @@ -211,11 +161,7 @@ def process_batch(self, detections, labels): x = torch.where(iou > self.iou_thres) if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) + matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -245,30 +191,17 @@ def plot(self, normalize=True, save_dir="", names=()): try: import seaborn as sn - array = self.matrix / ( - (self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1 - ) # normalize columns + array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1) # normalize columns array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) fig = plt.figure(figsize=(12, 9), tight_layout=True) sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size - labels = (0 < len(names) < 99) and len( - names - ) == self.nc # apply names to ticklabels + labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels with warnings.catch_warnings(): - warnings.simplefilter( - "ignore" - ) # suppress empty matrix RuntimeWarning: All-NaN slice encountered - sn.heatmap( - array, - annot=self.nc < 30, - annot_kws={"size": 8}, - cmap="Blues", - fmt=".2f", - square=True, + warnings.simplefilter("ignore") # suppress empty matrix RuntimeWarning: All-NaN slice encountered + sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap="Blues", fmt=".2f", square=True, xticklabels=names + ["background FP"] if labels else "auto", - yticklabels=names + ["background FN"] if labels else "auto", - ).set_facecolor((1, 1, 1)) + yticklabels=names + ["background FN"] if labels else "auto", ).set_facecolor((1, 1, 1)) fig.axes[0].set_xlabel("True") fig.axes[0].set_ylabel("Predicted") fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250) @@ -297,8 +230,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= # Intersection area inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( - torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) - ).clamp(0) + torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) # Union Area w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps @@ -307,24 +239,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= iou = inter / union if GIoU or DIoU or CIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min( - b1_x1, b2_x1 - ) # convex (smallest enclosing box) width + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared - rho2 = ( - (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 - + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 - ) / 4 # center distance squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + ( + b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared if DIoU: return iou - rho2 / c2 # DIoU - elif ( - CIoU - ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow( - torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 - ) + elif (CIoU): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU @@ -356,17 +280,8 @@ def box_area(box): area2 = box_area(box2.T) # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - inter = ( - ( - torch.min(box1[:, None, 2:], box2[:, 2:]) - - torch.max(box1[:, None, :2], box2[:, :2]) - ) - .clamp(0) - .prod(2) - ) - return inter / ( - area1[:, None] + area2 - inter - ) # iou = inter / (area1 + area2 - inter) + inter = ((torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)) + return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) def bbox_ioa(box1, box2, eps=1e-7): @@ -384,8 +299,7 @@ def bbox_ioa(box1, box2, eps=1e-7): # Intersection area inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * ( - np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1) - ).clip(0) + np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) # box2 area box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps @@ -399,9 +313,7 @@ def wh_iou(wh1, wh2): wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = torch.min(wh1, wh2).prod(2) # [N,M] - return inter / ( - wh1.prod(2) + wh2.prod(2) - inter - ) # iou = inter / (area1 + area2 - inter) + return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) # Plots ---------------------------------------------------------------------------------------------------------------- @@ -414,19 +326,11 @@ def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()): if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py.T): - ax.plot( - px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}" - ) # plot(recall, precision) + ax.plot(px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}") # plot(recall, precision) else: ax.plot(px, py, linewidth=1, color="grey") # plot(recall, precision) - ax.plot( - px, - py.mean(1), - linewidth=3, - color="blue", - label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), - ) + ax.plot(px, py.mean(1), linewidth=3, color="blue", label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), ) ax.set_xlabel("Recall") ax.set_ylabel("Precision") ax.set_xlim(0, 1) @@ -436,9 +340,7 @@ def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()): plt.close() -def plot_mc_curve( - px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric" -): +def plot_mc_curve(px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"): # Metric-confidence curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) @@ -449,13 +351,7 @@ def plot_mc_curve( ax.plot(px, py.T, linewidth=1, color="grey") # plot(confidence, metric) y = py.mean(0) - ax.plot( - px, - y, - linewidth=3, - color="blue", - label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", - ) + ax.plot(px, y, linewidth=3, color="blue", label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", ) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(0, 1) diff --git a/utils/segment.py b/utils/segment.py index 7a32ce518033..01d2d1cafd9f 100644 --- a/utils/segment.py +++ b/utils/segment.py @@ -1,23 +1,21 @@ -import numpy as np import time + import cv2 -import torch.nn.functional as F +import numpy as np import torch +import torch.nn.functional as F import torchvision + from .general import xyxy2xywh, xywh2xyxy from .seg_metrics import box_iou + def segment2box(segment, width=640, height=640): # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) x, y = segment.T # segment xy inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) - x, y, = ( - x[inside], - y[inside], - ) - return ( - np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) - ) # xyxy + x, y, = (x[inside], y[inside],) + return (np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4))) # xyxy def segments2boxes(segments): @@ -34,24 +32,12 @@ def resample_segments(segments, n=1000): for i, s in enumerate(segments): x = np.linspace(0, len(s) - 1, n) xp = np.arange(len(s)) - segments[i] = ( - np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]) - .reshape(2, -1) - .T - ) # segment xy + segments[i] = (np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T) # segment xy return segments -def non_max_suppression_masks( - prediction, - conf_thres=0.25, - iou_thres=0.45, - classes=None, - agnostic=False, - multi_label=False, - labels=(), - max_det=300, - mask_dim=32, -): + +def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, + multi_label=False, labels=(), max_det=300, mask_dim=32, ): """Runs Non-Maximum Suppression (NMS) on inference results Returns: @@ -62,12 +48,8 @@ def non_max_suppression_masks( xc = prediction[..., 4] > conf_thres # candidates # Checks - assert ( - 0 <= conf_thres <= 1 - ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" - assert ( - 0 <= iou_thres <= 1 - ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" + assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height @@ -79,9 +61,7 @@ def non_max_suppression_masks( nm = 5 + mask_dim t = time.time() - output = [ - torch.zeros((0, 6 + mask_dim), device=prediction.device) - ] * prediction.shape[0] + output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height @@ -110,14 +90,10 @@ def non_max_suppression_masks( # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, nm:] > conf_thres).nonzero(as_tuple=False).T - x = torch.cat( - (box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1 - ) + x = torch.cat((box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1) else: # best class only conf, j = x[:, nm:].max(1, keepdim=True) - x = torch.cat((box, conf, j.float(), pred_masks), 1)[ - conf.view(-1) > conf_thres - ] + x = torch.cat((box, conf, j.float(), pred_masks), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: @@ -144,9 +120,7 @@ def non_max_suppression_masks( # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights - x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( - 1, keepdim=True - ) # merged boxes + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy @@ -157,6 +131,7 @@ def non_max_suppression_masks( return output + def crop(masks, boxes): """ "Crop" predicted masks by zeroing out everything not in the predicted bbox. @@ -168,21 +143,10 @@ def crop(masks, boxes): """ h, w, n = masks.size() x1, x2 = boxes[:, 0], boxes[:, 2] - y1, y2 = ( - boxes[:, 1], - boxes[:, 3], - ) - - rows = ( - torch.arange(w, device=masks.device, dtype=x1.dtype) - .view(1, -1, 1) - .expand(h, w, n) - ) - cols = ( - torch.arange(h, device=masks.device, dtype=x1.dtype) - .view(-1, 1, 1) - .expand(h, w, n) - ) + y1, y2 = (boxes[:, 1], boxes[:, 3],) + + rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n)) + cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n)) # (1, w, 1), (1, 1, n) -> (1, w, n) masks_left = rows >= x1.view(1, 1, -1) @@ -196,6 +160,7 @@ def crop(masks, boxes): return masks * crop_mask.float() + def process_mask_upsample(proto_out, out_masks, bboxes, shape): """ Crop after unsample. @@ -207,8 +172,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): return: h, w, n """ # mask_h, mask_w, n - masks = proto_out.float().permute( - 1, 2, 0).contiguous() @ out_masks.float().tanh().T + masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T # print(masks.shape) masks = masks.sigmoid() # print('after sigmoid:', masks) @@ -217,7 +181,8 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0) # [mask_h, mask_w, n] masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes) - return masks.gt_(0.5) # .gt_(0.2) + return masks.gt_(0.5) # .gt_(0.2) + def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): """ @@ -233,8 +198,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): mh, mw = proto_out.shape[1:] ih, iw = shape # mask_h, mask_w, n - masks = proto_out.float().permute( - 1, 2, 0).contiguous() @ out_masks.float().tanh().T + masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T # print(masks) masks = masks.sigmoid() # print('after sigmoid:', masks) @@ -249,6 +213,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0) return masks.gt_(0.5).permute(1, 2, 0).contiguous() + def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): """ img1_shape: model input shape, [h, w] @@ -258,16 +223,14 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): """ # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape - gain = min(img1_shape[0] / img0_shape[0], - img1_shape[1] / img0_shape[1]) # gain = old / new - pad = (img1_shape[1] - img0_shape[1] * gain) / 2, ( - img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] tl_pad = int(pad[1]), int(pad[0]) # y, x br_pad = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0]) - + if len(masks.shape) < 2: raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') # masks_h, masks_w, n @@ -286,6 +249,7 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): return masks + def mask_iou(mask1, mask2): """ mask1: [N, n] m1 means number of predicted objects @@ -303,6 +267,7 @@ def mask_iou(mask1, mask2): return intersection / (union + 1e-7) + def masks_iou(mask1, mask2): """ mask1: [N, n] m1 means number of predicted objects @@ -315,4 +280,4 @@ def masks_iou(mask1, mask2): area1 = torch.sum(mask1, dim=1).view(1, -1) area2 = torch.sum(mask2, dim=1).view(1, -1) union = (area1 + area2) - intersection - return intersection / (union + 1e-7) \ No newline at end of file + return intersection / (union + 1e-7) From 6c1adea65014e2bcc3060c43e7cbf42c885ec7aa Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 13 Jul 2022 14:22:58 +0530 Subject: [PATCH 018/247] fix test --- utils/loggers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 65c673c64498..24e98ab3304f 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -160,7 +160,7 @@ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): def on_train_end(self, last, best, plots, epoch, results, masks=False): # Callback runs on training end - plot_results = plot_results_with_masks if masks else plot_results + # plot_results = plot_results_with_masks if masks else plot_results if plots: plot_results(file=self.save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] From c50fd2286bf2447db55919d12784597ac59d83f3 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Wed, 13 Jul 2022 17:47:09 +0800 Subject: [PATCH 019/247] add limit=10 for plotting while training --- evaluator.py | 12 +++++++----- utils/plots.py | 5 +++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/evaluator.py b/evaluator.py index 636c73482c98..83b6afc18126 100644 --- a/evaluator.py +++ b/evaluator.py @@ -64,7 +64,7 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): class Yolov5Evaluator: def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False, project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True, - mask=False, mask_downsample_ratio=1, ) -> None: + max_plot_dets=10, mask=False, mask_downsample_ratio=1, ) -> None: self.data = check_dataset(data) # check self.conf_thres = conf_thres # confidence threshold self.iou_thres = iou_thres # NMS IoU threshold @@ -79,6 +79,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls= self.save_dir = save_dir self.nosave = nosave self.plots = plots + self.max_plot_dets = max_plot_dets self.mask = mask self.mask_downsample_ratio = mask_downsample_ratio @@ -146,7 +147,7 @@ def run_training(self, model, dataloader, compute_loss=None): # for visualization if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski.cpu()) + self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) @@ -200,7 +201,7 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_ # for visualization if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski.cpu()) + self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) @@ -468,11 +469,12 @@ def plot_images(self, i, img, targets, masks, out, paths): else: pred_masks = None Thread(target=plot_images_boxes_and_masks, - args=(img, output_to_target(out), pred_masks, paths, f, self.names, max(img.shape[2:]),), + args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),), daemon=True, ).start() import wandb if wandb.run: - res = plot_images_boxes_and_masks(img, output_to_target(out), pred_masks, paths, f, self.names, + res = plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), + pred_masks, paths, f, self.names, max(img.shape[2:])) res = Image.fromarray(res) wandb.log({f"pred_{i}": wandb.Image(res)}) diff --git a/utils/plots.py b/utils/plots.py index 94e59fc8866c..f0c9b9ece4d4 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -723,10 +723,11 @@ def butter_lowpass(cutoff, fs, order): return filtfilt(b, a, data) # forward-backward filter -def output_to_target(output): +def output_to_target(output, filter_dets=10): # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] targets = [] for i, o in enumerate(output): + o = o[:filter_dets] for *box, conf, cls in o.cpu().numpy()[:, :6]: targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) return np.array(targets) @@ -1385,4 +1386,4 @@ def visualize(self, images, outputs, out_masks, vis_confs=0.4): masks_images.append(img_masks) # TODO: make this(ori_type stuff) clean images = masks_images[0] if (len(masks_images) == 1) and type(masks_images) != ori_type else images[0] - return self.vis(images, outputs, vis_confs) \ No newline at end of file + return self.vis(images, outputs, vis_confs) From 6f01da0a5a9227b90902f443d0bb1d4f602a0005 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 14 Jul 2022 10:03:26 +0800 Subject: [PATCH 020/247] add object sorting in mask_nms --- utils/segment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/segment.py b/utils/segment.py index 01d2d1cafd9f..4439b862b508 100644 --- a/utils/segment.py +++ b/utils/segment.py @@ -109,6 +109,8 @@ def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, class continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + # else: + # x = x[x[:, 4].argsort(descending=True)] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes From fd88f0624159d39d8b0b63f24f3a4431ad0ed43b Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 14 Jul 2022 10:03:35 +0800 Subject: [PATCH 021/247] fix on_train_end --- train_instseg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_instseg.py b/train_instseg.py index b5a307097368..acee896085ca 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -488,7 +488,7 @@ def fitness(x): if is_coco: callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch) - callbacks.run('on_train_end', last, best, plots, epoch, results, masks=True) + callbacks.run('on_train_end', plots, epoch, masks=True) torch.cuda.empty_cache() return results From 529c5401d5c2b371d6909c1d4c874f37d41b2ff1 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 14 Jul 2022 09:55:10 +0530 Subject: [PATCH 022/247] support noplots --- train_instseg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_instseg.py b/train_instseg.py index acee896085ca..e147d58f2106 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -169,7 +169,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mask=True, verbose=False, mask_downsample_ratio=mask_ratio, - plots=True + plots=plots ) g = [], [], [] # optimizer parameter groups bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() From afb81468ee61354dc86e8cc7d67a4dd362d82a0a Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 14 Jul 2022 10:31:58 +0530 Subject: [PATCH 023/247] attempt memory leak fix --- evaluator.py | 8 ++------ utils/loggers/__init__.py | 3 +-- utils/plots.py | 3 ++- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/evaluator.py b/evaluator.py index 83b6afc18126..2bbe62f52912 100644 --- a/evaluator.py +++ b/evaluator.py @@ -152,7 +152,7 @@ def run_training(self, model, dataloader, compute_loss=None): # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) - if batch_i < 3: + if self.plots and batch_i < 3: self.plot_images(batch_i, img, targets, masks, out, paths) # compute map and print it. @@ -473,11 +473,7 @@ def plot_images(self, i, img, targets, masks, out, paths): daemon=True, ).start() import wandb if wandb.run: - res = plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), - pred_masks, paths, f, self.names, - max(img.shape[2:])) - res = Image.fromarray(res) - wandb.log({f"pred_{i}": wandb.Image(res)}) + wandb.log({f"pred_{i}": wandb.Image(f)}) def nms(self, **kwargs): return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 24e98ab3304f..22c94d75f23b 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -404,8 +404,7 @@ def on_train_batch_end( ).start() if ni==0: if self.wandb: - res = plot_images_and_masks(imgs, targets, masks, paths) - wandb.log({"train_labels": wandb.Image(res)}) + wandb.log({"train_labels": wandb.Image(f)}) diff --git a/utils/plots.py b/utils/plots.py index f0c9b9ece4d4..f5cd3578929d 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -1300,7 +1300,8 @@ def plot_images_and_masks( mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA ) # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save - Image.fromarray(mosaic).save(fname) # PIL save + with Image.fromarray(mosaic) as im: + im.save(fname) return mosaic From ff65f54e475a93c313f8b2f86cdf461005f77a91 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 14 Jul 2022 13:03:15 +0800 Subject: [PATCH 024/247] fix object sorting in mask_nms --- utils/segment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/segment.py b/utils/segment.py index 4439b862b508..d9773784eafa 100644 --- a/utils/segment.py +++ b/utils/segment.py @@ -109,8 +109,8 @@ def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, class continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence - # else: - # x = x[x[:, 4].argsort(descending=True)] # sort by confidence + else: + x = x[x[:, 4].argsort(descending=True)] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes From a8ae73f1be023fb14982e50fd72113eee65614ed Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 14 Jul 2022 13:03:34 +0800 Subject: [PATCH 025/247] update evaluator --- evaluator.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/evaluator.py b/evaluator.py index 83b6afc18126..0cb037b8b6bd 100644 --- a/evaluator.py +++ b/evaluator.py @@ -110,6 +110,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls= self.total_loss = torch.zeros((4 if self.mask else 3)) self.metric = Metrics() if self.mask else Metric() + @torch.no_grad() def run_training(self, model, dataloader, compute_loss=None): """This is for evaluation when training.""" self.seen = 0 @@ -389,7 +390,7 @@ def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, ) - if not self.plots: + if gt_masksi.shape[1:] != pred_maski.shape[1:]: gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear", align_corners=False, ).squeeze(0) @@ -458,6 +459,14 @@ def plot_images(self, i, img, targets, masks, out, paths): return # plot ground truth f = self.save_dir / f"val_batch{i}_labels.jpg" # labels + + if masks.shape[1:] != img.shape[2:]: + masks = F.interpolate( + masks.unsqueeze(0), + img.shape[2:], + mode="bilinear", + align_corners=False, + ).squeeze(0) Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), daemon=True, ).start() From 69a116a8b8f05335d9f5b564fb2134b3e5edbe98 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 14 Jul 2022 18:32:31 +0800 Subject: [PATCH 026/247] fix masks==None --- evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluator.py b/evaluator.py index 0cb037b8b6bd..3a547bbbbcc0 100644 --- a/evaluator.py +++ b/evaluator.py @@ -460,7 +460,7 @@ def plot_images(self, i, img, targets, masks, out, paths): # plot ground truth f = self.save_dir / f"val_batch{i}_labels.jpg" # labels - if masks.shape[1:] != img.shape[2:]: + if masks is not None and masks.shape[1:] != img.shape[2:]: masks = F.interpolate( masks.unsqueeze(0), img.shape[2:], From fc439519c959c9553078c00875cea22a01176ba2 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 14 Jul 2022 18:20:08 +0530 Subject: [PATCH 027/247] add pdb --- evaluator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/evaluator.py b/evaluator.py index 2bbe62f52912..ecebe74d7170 100644 --- a/evaluator.py +++ b/evaluator.py @@ -153,6 +153,7 @@ def run_training(self, model, dataloader, compute_loss=None): self.compute_stat(pred, pred_maski, labels, gt_masksi) if self.plots and batch_i < 3: + import pdb;pdb.set_trace() self.plot_images(batch_i, img, targets, masks, out, paths) # compute map and print it. From 8bd03e49ffca74796ce616e1e6ebfd685f33999f Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Fri, 15 Jul 2022 09:11:30 +0530 Subject: [PATCH 028/247] remove redundant if --- utils/loggers/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 22c94d75f23b..5b229f5a0af2 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -402,7 +402,6 @@ def on_train_batch_end( args=(imgs, targets, masks, paths, f), daemon=True, ).start() - if ni==0: if self.wandb: wandb.log({"train_labels": wandb.Image(f)}) From 04f78b55bdaa5b0fd6b8d6d11cba6337410e6a1c Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Fri, 15 Jul 2022 09:40:39 +0530 Subject: [PATCH 029/247] remove pdb --- evaluator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/evaluator.py b/evaluator.py index b6e63f7c3af2..fb4745c27b7f 100644 --- a/evaluator.py +++ b/evaluator.py @@ -154,7 +154,6 @@ def run_training(self, model, dataloader, compute_loss=None): self.compute_stat(pred, pred_maski, labels, gt_masksi) if self.plots and batch_i < 3: - import pdb;pdb.set_trace() self.plot_images(batch_i, img, targets, masks, out, paths) # compute map and print it. From a8861ca68adb62a860eecb10d90f6c4189a0f4cb Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Fri, 15 Jul 2022 10:47:26 +0530 Subject: [PATCH 030/247] str typrcast wandb image --- evaluator.py | 2 +- utils/loggers/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluator.py b/evaluator.py index fb4745c27b7f..0fc6660785aa 100644 --- a/evaluator.py +++ b/evaluator.py @@ -482,7 +482,7 @@ def plot_images(self, i, img, targets, masks, out, paths): daemon=True, ).start() import wandb if wandb.run: - wandb.log({f"pred_{i}": wandb.Image(f)}) + wandb.log({f"pred_{i}": wandb.Image(str(f))}) def nms(self, **kwargs): return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 5b229f5a0af2..c80c8077baca 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -403,7 +403,7 @@ def on_train_batch_end( daemon=True, ).start() if self.wandb: - wandb.log({"train_labels": wandb.Image(f)}) + wandb.log({"train_labels": wandb.Image(str(f))}) From 1cf9fd31a246ba33aeb1ebc48dd81f86aeed0637 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Fri, 15 Jul 2022 11:28:27 +0530 Subject: [PATCH 031/247] fix thread race condition temporality: allow log --- evaluator.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/evaluator.py b/evaluator.py index 0fc6660785aa..5d92244af5c0 100644 --- a/evaluator.py +++ b/evaluator.py @@ -153,7 +153,7 @@ def run_training(self, model, dataloader, compute_loss=None): # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) - if self.plots and batch_i < 3: + if self.plots and batch_i < 2: self.plot_images(batch_i, img, targets, masks, out, paths) # compute map and print it. @@ -477,9 +477,10 @@ def plot_images(self, i, img, targets, masks, out, paths): pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0]) else: pred_masks = None - Thread(target=plot_images_boxes_and_masks, - args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),), - daemon=True, ).start() + plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:])) + #Thread(target=plot_images_boxes_and_masks, + # args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),), + # daemon=True, ).start() import wandb if wandb.run: wandb.log({f"pred_{i}": wandb.Image(str(f))}) From 67bae3d67b963592a8cac2a9e619b5177a0dc439 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Fri, 15 Jul 2022 16:18:56 +0800 Subject: [PATCH 032/247] fix ddp issue --- train_instseg.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/train_instseg.py b/train_instseg.py index e147d58f2106..304b86282fa8 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -21,7 +21,6 @@ from copy import deepcopy from datetime import datetime from pathlib import Path -from matplotlib.pyplot import plot import numpy as np import torch @@ -58,10 +57,6 @@ from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -71,7 +66,6 @@ from torch.optim import AdamW import yaml from datetime import datetime -from distutils import dist from evaluator import Yolov5Evaluator def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary @@ -344,7 +338,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: - train_loader.sampler.set_epoch(epoch) + train_loader.batch_sampler.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) if RANK in {-1, 0}: From ff85de368df95e7374cd3c24f1d352778f60a7bd Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 18 Jul 2022 10:44:47 +0530 Subject: [PATCH 033/247] update loss tensor ops --- utils/seg_loss.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/seg_loss.py b/utils/seg_loss.py index 8ffb4439c2f0..e74b82adae7b 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -210,8 +210,8 @@ def build_targets(self, p, targets): ], device=targets.device, ).float() * g) # offsets for i in range(self.nl): - anchors = self.anchors[i] - gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain @@ -243,7 +243,7 @@ def build_targets(self, p, targets): # Append a = t[:, 6].long() # anchor indices - indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices + indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class From 2ee7f43f5706d1cd91cbc6f7bfcdb3d88267fcd0 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 19 Jul 2022 12:38:20 +0530 Subject: [PATCH 034/247] attempt to fix eval calculation --- evaluator.py => eval_seg.py | 431 ++++++++++++++++------ train_instseg.py | 4 +- utils/boxes.py | 298 ++++++++++++++++ utils/seg_plots.py | 689 ++++++++++++++++++++++++++++++++++++ 4 files changed, 1307 insertions(+), 115 deletions(-) rename evaluator.py => eval_seg.py (66%) create mode 100644 utils/boxes.py create mode 100644 utils/seg_plots.py diff --git a/evaluator.py b/eval_seg.py similarity index 66% rename from evaluator.py rename to eval_seg.py index 5d92244af5c0..24f2e40b95db 100644 --- a/evaluator.py +++ b/eval_seg.py @@ -1,12 +1,3 @@ -# TODO: Optimize plotting, losses & merge with val.py - -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Validate a trained YOLOv5 model accuracy on a custom dataset - -Usage: - $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640 -""" import json from pathlib import Path @@ -15,18 +6,33 @@ import numpy as np import torch import torch.nn.functional as F -from PIL import Image # import pycocotools.mask as mask_util from tqdm import tqdm from models.experimental import attempt_load from seg_dataloaders import create_dataloader -from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, ) -from utils.general import (check_dataset, check_img_size, check_suffix, ) -from utils.general import (coco80_to_coco91_class, increment_path, colorstr, ) -from utils.plots import output_to_target, plot_images_boxes_and_masks +from utils.general import ( + coco80_to_coco91_class, + increment_path, + colorstr, check_dataset, check_img_size, check_suffix +) + +from utils.segment import ( + non_max_suppression_masks, + mask_iou, + process_mask, + process_mask_upsample, + scale_masks, +) +from utils.boxes import ( + box_iou, + non_max_suppression, + scale_coords, + xyxy2xywh, + xywh2xyxy, +) from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix -from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, ) +from utils.seg_plots import output_to_target, plot_images_boxes_and_masks from utils.torch_utils import select_device, time_sync @@ -34,7 +40,9 @@ def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): - xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh + xywh = ( + (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() + ) # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") @@ -48,13 +56,20 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): if pred_masks is not None: pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] + rles = [ + mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in pred_masks + ] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): - pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b], - "score": round(p[4], 5), } + pred_dict = { + "image_id": image_id, + "category_id": class_map[int(p[5])], + "bbox": [round(x, 3) for x in b], + "score": round(p[4], 5), + } if pred_masks is not None: pred_dict["segmentation"] = rles[i] jdict.append(pred_dict) @@ -62,9 +77,25 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): @torch.no_grad() class Yolov5Evaluator: - def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False, - project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True, - max_plot_dets=10, mask=False, mask_downsample_ratio=1, ) -> None: + def __init__( + self, + data, + conf_thres=0.001, + iou_thres=0.6, + device="", + single_cls=False, + augment=False, + verbose=False, + project="runs/val", + name="exp", + exist_ok=False, + half=True, + save_dir=Path(""), + nosave=False, + plots=True, + mask=False, + mask_downsample_ratio=1, + ) -> None: self.data = check_dataset(data) # check self.conf_thres = conf_thres # confidence threshold self.iou_thres = iou_thres # NMS IoU threshold @@ -79,7 +110,6 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls= self.save_dir = save_dir self.nosave = nosave self.plots = plots - self.max_plot_dets = max_plot_dets self.mask = mask self.mask_downsample_ratio = mask_downsample_ratio @@ -89,14 +119,40 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls= self.confusion_matrix = ConfusionMatrix(nc=self.nc) self.dt = [0.0, 0.0, 0.0] self.names = {k: v for k, v in enumerate(self.data["names"])} - self.s = (("%20s" + "%11s" * 10) % ( - "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5", - "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % ( - "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",)) + self.s = ( + ("%20s" + "%11s" * 10) + % ( + "Class", + "Images", + "Labels", + "Box:{P", + "R", + "mAP@.5", + "mAP@.5:.95}", + "Mask:{P", + "R", + "mAP@.5", + "mAP@.5:.95}", + ) + if self.mask + else ("%20s" + "%11s" * 6) + % ( + "Class", + "Images", + "Labels", + "P", + "R", + "mAP@.5", + "mAP@.5:.95", + ) + ) # coco stuff - self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith( - "coco/val2017.txt") # COCO dataset + self.is_coco = isinstance(self.data.get("val"), str) and self.data[ + "val" + ].endswith( + "coco/val2017.txt" + ) # COCO dataset self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) self.jdict = [] self.iou_thres = 0.65 if self.is_coco else self.iou_thres @@ -110,7 +166,6 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls= self.total_loss = torch.zeros((4 if self.mask else 3)) self.metric = Metrics() if self.mask else Metric() - @torch.no_grad() def run_training(self, model, dataloader, compute_loss=None): """This is for evaluation when training.""" self.seen = 0 @@ -124,7 +179,9 @@ def run_training(self, model, dataloader, compute_loss=None): # inference # masks will be `None` if training objection. - for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): + for batch_i, (img, targets, paths, shapes, masks) in enumerate( + tqdm(dataloader, desc=self.s) + ): # reset pred_masks self.pred_masks = [] img = img.to(self.device, non_blocking=True) @@ -143,17 +200,20 @@ def run_training(self, model, dataloader, compute_loss=None): # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks(pred, proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, ) + pred_maski = self.get_predmasks( + pred, + proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, + ) # for visualization if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) + self.pred_masks.append(pred_maski.cpu()) # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) - if self.plots and batch_i < 2: + if self.plots and batch_i < 3: self.plot_images(batch_i, img, targets, masks, out, paths) # compute map and print it. @@ -161,12 +221,29 @@ def run_training(self, model, dataloader, compute_loss=None): # Return results model.float() # for training - return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), - self.metric.get_maps(self.nc), t,) - - def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ): + return ( + ( + *self.metric.mean_results(), + *(self.total_loss.cpu() / len(dataloader)).tolist(), + ), + self.metric.get_maps(self.nc), + t, + ) + + def run( + self, + weights, + batch_size, + imgsz, + save_txt=False, + save_conf=False, + save_json=False, + task="val", + ): """This is for native evaluation.""" - model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task) + model, dataloader, imgsz = self.before_infer( + weights, batch_size, imgsz, save_txt, task + ) self.seen = 0 # self.iouv.to(self.device) self.half &= self.device.type != "cpu" # half precision only supported on CUDA @@ -175,7 +252,9 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_ model.eval() # inference - for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): + for batch_i, (img, targets, paths, shapes, masks) in enumerate( + tqdm(dataloader, desc=self.s) + ): # reset pred_masks self.pred_masks = [] img = img.to(self.device, non_blocking=True) @@ -197,12 +276,15 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_ # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks(pred, proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, ) + pred_maski = self.get_predmasks( + pred, + proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, + ) # for visualization if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) + self.pred_masks.append(pred_maski.cpu()) # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) @@ -215,21 +297,36 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_ # clone() is for plot_images work correctly predn = pred.clone() # 因为test时添加了0.5的padding,因此这里与数据加载的padding不一致,所以需要转入ratio_pad - scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad) # native-space pred - + scale_coords( + img[si].shape[1:], predn[:, :4], shape, ratio_pad + ) # native-space pred # Save/log if save_txt and self.save_dir.exists(): # NOTE: convert coords to native space when save txt. # support save box preditions only - save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), ) + save_one_txt( + predn, + save_conf, + shape, + file=self.save_dir / "labels" / (path.stem + ".txt"), + ) if save_json and self.save_dir.exists(): # NOTE: convert coords to native space when save json. # if pred_maski is not None: # h, w, n - pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), - shape, ratio_pad, ) - save_one_json(predn, self.jdict, path, self.class_map, - pred_maski, ) # append to COCO-JSON dictionary + pred_maski = scale_masks( + img[si].shape[1:], + pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, + ratio_pad, + ) + save_one_json( + predn, + self.jdict, + path, + self.class_map, + pred_maski, + ) # append to COCO-JSON dictionary if self.plots and batch_i < 3: self.plot_images(batch_i, img, targets, masks, out, paths) @@ -246,24 +343,42 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_ # Print speeds shape = (batch_size, 3, imgsz, imgsz) - print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t) + print( + f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" + % t + ) s = ( - f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "") - print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}") + f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" + if save_txt and self.save_dir.exists() + else "" + ) + print( + f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}" + ) # Return results - return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), - self.metric.get_maps(self.nc), t,) + return ( + ( + *self.metric.mean_results(), + *(self.total_loss.cpu() / len(dataloader)).tolist(), + ), + self.metric.get_maps(self.nc), + t, + ) def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): "prepare for evaluation without training." self.device = select_device(self.device, batch_size=batch_size) # Directories - self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok) # increment run + self.save_dir = increment_path( + Path(self.project) / self.name, exist_ok=self.exist_ok + ) # increment run if not self.nosave: - (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) # make dir + (self.save_dir / "labels" if save_txt else self.save_dir).mkdir( + parents=True, exist_ok=True + ) # make dir # Load model check_suffix(weights, ".pt") @@ -273,11 +388,27 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): # Data if self.device.type != "cpu": - model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters()))) # run once + model( + torch.zeros(1, 3, imgsz, imgsz) + .to(self.device) + .type_as(next(model.parameters())) + ) # run once pad = 0.0 if task == "speed" else 0.5 - task = (task if task in ("train", "val", "test") else "val") # path to train/val/test images - dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True, - prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0] + task = ( + task if task in ("train", "val", "test") else "val" + ) # path to train/val/test images + dataloader = create_dataloader( + self.data[task], + imgsz, + batch_size, + gs, + self.single_cls, + pad=pad, + rect=True, + prefix=colorstr(f"{task}: "), + mask_head=self.mask, + mask_downsample_ratio=self.mask_downsample_ratio, + )[0] return model, dataloader, imgsz def inference(self, model, img, targets, masks=None, compute_loss=None): @@ -290,18 +421,29 @@ def inference(self, model, img, targets, masks=None, compute_loss=None): self.dt[0] += t2 - t1 # Run model - out, train_out = model(img, augment=self.augment) # inference and training outputs + out, train_out = model( + img, augment=self.augment + ) # inference and training outputs self.dt[1] += time_sync() - t2 # Compute loss if compute_loss: - self.total_loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls + self.total_loss += compute_loss(train_out, targets, masks)[ + 1 + ] # box, obj, cls # Run NMS - targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device) # to pixels + targets[:, 2:] *= torch.Tensor([width, height, width, height]).to( + self.device + ) # to pixels t3 = time_sync() - out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True, - agnostic=self.single_cls, ) + out = self.nms( + prediction=out, + conf_thres=self.conf_thres, + iou_thres=self.iou_thres, + multi_label=True, + agnostic=self.single_cls, + ) self.dt[2] += time_sync() - t3 return out, train_out @@ -312,18 +454,25 @@ def after_infer(self): """ # Plot confusion matrix if self.plots and self.save_dir.exists(): - self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values())) + self.confusion_matrix.plot( + save_dir=self.save_dir, names=list(self.names.values()) + ) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*self.stats)] # to numpy box_or_mask_any = stats[0].any() or stats[1].any() stats = stats[1:] if not self.mask else stats if len(stats) and box_or_mask_any: - results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None, - self.names, ) + results = self.ap_per_class( + *stats, + self.plots, + self.save_dir if self.save_dir.exists() else None, + self.names, + ) self.metric.update(results) - nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64), - minlength=self.nc) # number of targets per class + nt = np.bincount( + stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc + ) # number of targets per class else: nt = torch.zeros(1) @@ -343,13 +492,19 @@ def process_batch(self, detections, labels, iouv): Returns: correct (Array[N, 10]), for 10 IoU levels """ - correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) + correct = torch.zeros( + detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device + ) iou = box_iou(labels[:, 1:], detections[:, :4]) x = torch.where( - (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])) # IoU above threshold and classes match + (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]) + ) # IoU above threshold and classes match if x[0].shape[0]: matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) # [label, detection, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -377,29 +532,53 @@ def get_predmasks(self, pred, proto_out, gt_shape): if proto_out is None or len(pred) == 0: return None process = process_mask_upsample if self.plots else process_mask - gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,) + gt_shape = ( + gt_shape[0] * self.mask_downsample_ratio, + gt_shape[1] * self.mask_downsample_ratio, + ) # n, h, w - pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous()) + pred_mask = ( + process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape) + .permute(2, 0, 1) + .contiguous() + ) return pred_mask def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): - assert not ((pred_maski is None) ^ ( - gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist." + assert not ( + (pred_maski is None) ^ (gt_masksi is None) + ), "`proto_out` and `gt_masksi` should be both None or both exist." if pred_maski is None and gt_masksi is None: return torch.zeros(0, self.niou, dtype=torch.bool) - correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, ) - - if gt_masksi.shape[1:] != pred_maski.shape[1:]: - gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear", - align_corners=False, ).squeeze(0) + correct = torch.zeros( + predn.shape[0], + self.iouv.shape[0], + dtype=torch.bool, + device=self.iouv.device, + ) + + if not self.plots: + gt_masksi = F.interpolate( + gt_masksi.unsqueeze(0), + pred_maski.shape[1:], + mode="bilinear", + align_corners=False, + ).squeeze(0) - iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), ) + iou = mask_iou( + gt_masksi.view(gt_masksi.shape[0], -1), + pred_maski.view(pred_maski.shape[0], -1), + ) x = torch.where( - (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match + (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]) + ) # IoU above threshold and classes match if x[0].shape[0]: matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) # [label, detection, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -416,9 +595,15 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski): if len(predn) == 0: if nl: - self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), # boxes - torch.zeros(0, self.niou, dtype=torch.bool), # masks - torch.Tensor(), torch.Tensor(), tcls,)) + self.stats.append( + ( + torch.zeros(0, self.niou, dtype=torch.bool), # boxes + torch.zeros(0, self.niou, dtype=torch.bool), # masks + torch.Tensor(), + torch.Tensor(), + tcls, + ) + ) return # Predictions @@ -433,15 +618,24 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski): correct_boxes = self.process_batch(predn, labelsn, self.iouv) # masks - correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn) + correct_masks = self.process_batch_masks( + predn, pred_maski, gt_maski, labelsn + ) if self.plots: self.confusion_matrix.process_batch(predn, labelsn) else: correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) - self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(), - tcls,)) # (correct, conf, pcls, tcls) + self.stats.append( + ( + correct_masks.cpu(), + correct_boxes.cpu(), + predn[:, 4].cpu(), + predn[:, 5].cpu(), + tcls, + ) + ) # (correct, conf, pcls, tcls) def print_metric(self, nt, stats): # Print results @@ -452,41 +646,52 @@ def print_metric(self, nt, stats): # TODO: self.seen support verbose. if self.verbose and self.nc > 1 and len(stats): for i, c in enumerate(self.metric.ap_class_index): - print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))) + print( + pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)) + ) def plot_images(self, i, img, targets, masks, out, paths): if not self.save_dir.exists(): return # plot ground truth f = self.save_dir / f"val_batch{i}_labels.jpg" # labels - - if masks is not None and masks.shape[1:] != img.shape[2:]: - masks = F.interpolate( - masks.unsqueeze(0), - img.shape[2:], - mode="bilinear", - align_corners=False, - ).squeeze(0) - Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), - daemon=True, ).start() + Thread( + target=plot_images_boxes_and_masks, + args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), + daemon=True, + ).start() f = self.save_dir / f"val_batch{i}_pred.jpg" # predictions # plot predition if len(self.pred_masks): - pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0]) + pred_masks = ( + torch.cat(self.pred_masks, dim=0) + if len(self.pred_masks) > 1 + else self.pred_masks[0] + ) else: pred_masks = None - plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:])) - #Thread(target=plot_images_boxes_and_masks, - # args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),), - # daemon=True, ).start() - import wandb - if wandb.run: - wandb.log({f"pred_{i}": wandb.Image(str(f))}) + Thread( + target=plot_images_boxes_and_masks, + args=( + img, + output_to_target(out), + pred_masks, + paths, + f, + self.names, + max(img.shape[2:]), + ), + daemon=True, + ).start() def nms(self, **kwargs): - return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) + return ( + non_max_suppression_masks(**kwargs) + if self.mask + else non_max_suppression(**kwargs) + ) def ap_per_class(self, *args): return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args) diff --git a/train_instseg.py b/train_instseg.py index 304b86282fa8..f5ef6b15a580 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -54,7 +54,7 @@ from utils.loggers.wandb.wandb_utils import check_wandb_resume from utils.seg_loss import ComputeLoss #from utils.metrics import fitness -from utils.plots import plot_evolve, plot_labels +from utils.seg_plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first @@ -66,7 +66,7 @@ from torch.optim import AdamW import yaml from datetime import datetime -from evaluator import Yolov5Evaluator +from eval_seg import Yolov5Evaluator def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary print(device) diff --git a/utils/boxes.py b/utils/boxes.py new file mode 100644 index 000000000000..1881dde83c81 --- /dev/null +++ b/utils/boxes.py @@ -0,0 +1,298 @@ +import time + +import cv2 +import numpy as np +import torch +import torchvision + +from utils.general import clip_coords, scale_coords, xywh2xyxy, xyxy2xywh +from .general import increment_path +from .metrics import box_iou + + +def nms_numpy(boxes, scores, class_id, threshold, method=None, agnostic=False): + """ + :param boxes: numpy(N, 4), xyxy + :param scores: numpy(N, ) + :param class_id: numpy(N, ) + :param threshold: float + :param method: + :return: kept boxed index + """ + if boxes.size == 0: + return np.empty((0,), dtype=np.int8) + max_wh = 4096 + if isinstance(boxes, torch.Tensor): + boxes = boxes.cpu().numpy() + if isinstance(scores, torch.Tensor): + scores = scores.cpu().numpy() + if isinstance(class_id, torch.Tensor): + class_id = class_id.cpu().numpy() + + if boxes.ndim == 1: + boxes = boxes[None, :] + assert boxes.shape[1] == 4, f"expected boxes shape [N, 4], but got {boxes.shape}" + if len(class_id.shape) == 1: + class_id = class_id[:, None] + + assert (boxes.shape[0] == class_id.shape[0] == scores.shape[0]), f"boxes, class_id and scores shapes must be equal" + + c = class_id * (0 if agnostic else max_wh) + boxes = boxes + c + x1 = boxes[:, 0].copy() + y1 = boxes[:, 1].copy() + x2 = boxes[:, 2].copy() + y2 = boxes[:, 3].copy() + + s = scores + area = (x2 - x1 + 1) * (y2 - y1 + 1) + + I = np.argsort(s) # 从小到大排序索引 + pick = np.zeros_like(s, dtype=np.int16) + counter = 0 + while I.size > 0: + i = I[-1] + pick[counter] = i + counter += 1 + idx = I[0:-1] + + xx1 = np.maximum(x1[i], x1[idx]).copy() + yy1 = np.maximum(y1[i], y1[idx]).copy() + xx2 = np.minimum(x2[i], x2[idx]).copy() + yy2 = np.minimum(y2[i], y2[idx]).copy() + + w = np.maximum(0.0, xx2 - xx1 + 1).copy() + h = np.maximum(0.0, yy2 - yy1 + 1).copy() + + inter = w * h + if method == "Min": + o = inter / np.minimum(area[i], area[idx]) + else: + o = inter / (area[i] + area[idx] - inter) + I = I[np.where(o <= threshold)] + + pick = pick[:counter].copy() + return pick + + +def save_one_box(xyxy, im, file="image.jpg", gain=1.02, pad=10, square=False, BGR=False, save=True): + # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop + xyxy = torch.tensor(xyxy).view(-1, 4) + b = xyxy2xywh(xyxy) # boxes + if square: + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square + b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad + xyxy = xywh2xyxy(b).long() + clip_coords(xyxy, im.shape) + crop = im[int(xyxy[0, 1]): int(xyxy[0, 3]), int(xyxy[0, 0]): int(xyxy[0, 2]), :: (1 if BGR else -1), ] + if save: + cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix(".jpg")), crop) + return crop + + +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, + labels=(), max_det=300, ): + """Runs Non-Maximum Suppression (NMS) on inference results + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" + assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + + # Settings + min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 10.0 # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + l = labels[xi] + v = torch.zeros((len(l), nc + 5), device=x.device) + v[:, :4] = l[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + print(f"WARNING: NMS time limit {time_limit}s exceeded") + break # time limit exceeded + + return output + + +def non_max_suppression_numpy(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, + multi_label=False, labels=(), max_det=300, ): + """Runs Non-Maximum Suppression (NMS) on inference results + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" + assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + + # Settings + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 10.0 # seconds to quit after + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + + t = time.time() + output = [np.zeros((0, 6))] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + l = labels[xi] + v = np.zeros((len(l), nc + 5), device=x.device) + v[:, :4] = l[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls + x = np.concatenate((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1), x[:, 5:].argmax(1) + x = np.concatenate((box, conf[:, None], j.astype(np.float)[:, None]), 1)[conf > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == np.array(classes)).any(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + boxes, scores, cls = x[:, :4], x[:, 4], x[:, 5] + i = nms_numpy(boxes, scores, cls, iou_thres, agnostic) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + + output[xi] = x[i][None, :] if x[i].ndim == 1 else x[i] + if (time.time() - t) > time_limit: + print(f"WARNING: NMS time limit {time_limit}s exceeded") + break # time limit exceeded + + return output + + +def apply_classifier(x, model, img, im0): + # Apply a second stage classifier to yolo outputs + im0 = [im0] if isinstance(im0, np.ndarray) else im0 + for i, d in enumerate(x): # per image + if d is not None and len(d): + d = d.clone() + + # Reshape and pad cutouts + b = xyxy2xywh(d[:, :4]) # boxes + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square + b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad + d[:, :4] = xywh2xyxy(b).long() + + # Rescale boxes from img_size to im0 size + scale_coords(img.shape[2:], d[:, :4], im0[i].shape) + + # Classes + pred_cls1 = d[:, 5].long() + ims = [] + for j, a in enumerate(d): # per item + cutout = im0[i][int(a[1]): int(a[3]), int(a[0]): int(a[2])] + im = cv2.resize(cutout, (224, 224)) # BGR + # cv2.imwrite('example%i.jpg' % j, cutout) + + im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 + im /= 255.0 # 0 - 255 to 0.0 - 1.0 + ims.append(im) + + pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction + x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections + + return x diff --git a/utils/seg_plots.py b/utils/seg_plots.py new file mode 100644 index 000000000000..3f09d2ad272c --- /dev/null +++ b/utils/seg_plots.py @@ -0,0 +1,689 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Plotting utils +""" + +import math +import os +from copy import copy +from itertools import repeat +from pathlib import Path + +import cv2 +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sn +import torch +from PIL import Image, ImageDraw + +from utils.general import check_font, is_ascii, is_chinese +from utils.seg_metrics import fitness +from .boxes import xywh2xyxy, xyxy2xywh + +# Settings +RANK = int(os.getenv("RANK", -1)) +matplotlib.rc("font", **{"size": 11}) +matplotlib.use("Agg") # for writing to files only + + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + # hex = matplotlib.colors.TABLEAU_COLORS.values() + hex = ("FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A", "92CC17", "3DDB86", "1A9334", "00D4BB", + "2C99A8", "00C2FF", "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF", "FF95C8", "FF37C7",) + self.palette = [self.hex2rgb("#" + c) for c in hex] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): # rgb order (PIL) + return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4)) + + +colors = Colors() # create instance for 'from utils.plots import colors' + + +class Annotator: + if RANK in (-1, 0): + check_font() # download TTF if necessary + + # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations + def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=False, example="abc", ): + assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images." + self.pil = pil or not is_ascii(example) or is_chinese(example) + if self.pil: # use PIL + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + self.font = check_font(font="Arial.Unicode.ttf" if is_chinese(example) else font, + size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12), ) + else: # use cv2 + self.im = im + self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width + + def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)): + # Add one xyxy box to image with label + if self.pil or not is_ascii(label): + self.draw.rectangle(box, width=self.lw, outline=color) # box + if label: + w, h = self.font.getsize(label) # text width, height + outside = box[1] - h >= 0 # label fits outside box + self.draw.rectangle([box[0], box[1] - h if outside else box[1], box[0] + w + 1, + box[1] + 1 if outside else box[1] + h + 1, ], fill=color, ) + # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 + self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font, ) + else: # cv2 + p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) + if label: + tf = max(self.lw - 1, 1) # font thickness + w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height + outside = p1[1] - h - 3 >= 0 # label fits outside box + p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 + cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled + cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color, + thickness=tf, lineType=cv2.LINE_AA, ) + + def rectangle(self, xy, fill=None, outline=None, width=1): + # Add rectangle to image (PIL-only) + self.draw.rectangle(xy, fill, outline, width) + + def text(self, xy, text, txt_color=(255, 255, 255)): + # Add text to image (PIL-only) + w, h = self.font.getsize(text) # text width, height + self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font) + + def result(self): + # Return annotated image as array + return np.asarray(self.im) + + +class Visualizer(object): + """Visualization of one model.""" + + def __init__(self, names) -> None: + super().__init__() + self.names = names + + def draw_one_img(self, img, output, vis_conf=0.4): + """Visualize one images. + + Args: + imgs (numpy.ndarray): one image. + outputs (torch.Tensor): one output, (num_boxes, classes+5) + vis_confs (float, optional): Visualize threshold. + Return: + img (numpy.ndarray): Image after visualization. + """ + if isinstance(output, list): + output = output[0] + if output is None or len(output) == 0: + return img + for (*xyxy, conf, cls) in reversed(output[:, :6]): + if conf < vis_conf: + continue + label = '%s %.2f' % (self.names[int(cls)], conf) + color = colors(int(cls)) + plot_one_box(xyxy, img, label=label, color=color, line_thickness=2) + return img + + def draw_multi_img(self, imgs, outputs, vis_confs=0.4): + """Visualize multi images. + + Args: + imgs (List[numpy.array]): multi images. + outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5]. + vis_confs (float | tuple[float], optional): Visualize threshold. + Return: + imgs (List[numpy.ndarray]): Images after visualization. + """ + if isinstance(vis_confs, float): + vis_confs = list(repeat(vis_confs, len(imgs))) + assert len(imgs) == len(outputs) == len(vis_confs) + for i, output in enumerate(outputs): # detections per image + self.draw_one_img(imgs[i], output, vis_confs[i]) + return imgs + + def draw_imgs(self, imgs, outputs, vis_confs=0.4): + if isinstance(imgs, np.ndarray): + return self.draw_one_img(imgs, outputs, vis_confs) + else: + return self.draw_multi_img(imgs, outputs, vis_confs) + + def __call__(self, imgs, outputs, vis_confs=0.4): + return self.draw_imgs(imgs, outputs, vis_confs) + + +def hist2d(x, y, n=100): + # 2d histogram used in labels.png and evolve.png + xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) + hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) + xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) + yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) + return np.log(hist[xidx, yidx]) + + +def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): + from scipy.signal import butter, filtfilt + + # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy + def butter_lowpass(cutoff, fs, order): + nyq = 0.5 * fs + normal_cutoff = cutoff / nyq + return butter(order, normal_cutoff, btype="low", analog=False) + + b, a = butter_lowpass(cutoff, fs, order=order) + return filtfilt(b, a, data) # forward-backward filter + + +def output_to_target(output): + # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] + targets = [] + for i, o in enumerate(output): + for *box, conf, cls in o.cpu().numpy()[:, :6]: + targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) + return np.array(targets) + + +def plot_images(images, targets, paths=None, fname="images.jpg", names=None, max_size=1920, max_subplots=16, ): + # Plot image grid with labels + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + if np.max(images[0]) <= 1: + images *= 255.0 # de-normalise (optional) + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + + # Build Image + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, im in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y: y + h, x: x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int((h + w) * ns * 0.01) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220), ) # filenames + if len(targets) > 0: + ti = targets[targets[:, 0] == i] # image targets + boxes = xywh2xyxy(ti[:, 2:6]).T + classes = ti[:, 1].astype("int") + labels = ti.shape[1] == 6 # labels if no conf column + conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + cls = classes[j] + color = colors(cls) + cls = names[cls] if names else cls + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}" + annotator.box_label(box, label, color=color) + annotator.im.save(fname) # save + return annotator.result() + + +def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""): + # Plot LR simulating training for full epochs + optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals + y = [] + for _ in range(epochs): + scheduler.step() + y.append(optimizer.param_groups[0]["lr"]) + plt.plot(y, ".-", label="LR") + plt.xlabel("epoch") + plt.ylabel("LR") + plt.grid() + plt.xlim(0, epochs) + plt.ylim(0) + plt.savefig(Path(save_dir) / "LR.png", dpi=200) + plt.close() + + +def plot_val_txt(): # from utils.plots import *; plot_val() + # Plot val.txt histograms + x = np.loadtxt("val.txt", dtype=np.float32) + box = xyxy2xywh(x[:, :4]) + cx, cy = box[:, 0], box[:, 1] + + fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) + ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) + ax.set_aspect("equal") + plt.savefig("hist2d.png", dpi=300) + + fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) + ax[0].hist(cx, bins=600) + ax[1].hist(cy, bins=600) + plt.savefig("hist1d.png", dpi=200) + + +def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() + # Plot targets.txt histograms + x = np.loadtxt("targets.txt", dtype=np.float32).T + s = ["x targets", "y targets", "width targets", "height targets"] + fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) + ax = ax.ravel() + for i in range(4): + ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std())) + ax[i].legend() + ax[i].set_title(s[i]) + plt.savefig("targets.jpg", dpi=200) + + +def plot_val_study(file="", dir="", x=None): # from utils.plots import *; plot_val_study() + # Plot file=study.txt generated by val.py (or plot all study*.txt in dir) + save_dir = Path(file).parent if file else Path(dir) + plot2 = False # plot additional results + if plot2: + ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel() + + fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) + # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]: + for f in sorted(save_dir.glob("study*.txt")): + y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T + x = np.arange(y.shape[1]) if x is None else np.array(x) + if plot2: + s = ["P", "R", "mAP@.5", "mAP@.5:.95", "t_preprocess (ms/img)", "t_inference (ms/img)", "t_NMS (ms/img)", ] + for i in range(7): + ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8) + ax[i].set_title(s[i]) + + j = y[3].argmax() + 1 + ax2.plot(y[5, 1:j], y[3, 1:j] * 1e2, ".-", linewidth=2, markersize=8, + label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"), ) + + ax2.plot(1e3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], "k.-", linewidth=2, + markersize=8, alpha=0.25, label="EfficientDet", ) + + ax2.grid(alpha=0.2) + ax2.set_yticks(np.arange(20, 60, 5)) + ax2.set_xlim(0, 57) + ax2.set_ylim(25, 55) + ax2.set_xlabel("GPU Speed (ms/img)") + ax2.set_ylabel("COCO AP val") + ax2.legend(loc="lower right") + f = save_dir / "study.png" + print(f"Saving {f}...") + plt.savefig(f, dpi=300) + + +def plot_labels(labels, names=(), save_dir=Path("")): + # plot dataset labels + print("Plotting labels... ") + c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes + nc = int(c.max() + 1) # number of classes + x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"]) + + # seaborn correlogram + sn.pairplot(x, corner=True, diag_kind="auto", kind="hist", diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9), ) + plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200) + plt.close() + + # matplotlib labels + matplotlib.use("svg") # faster + ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() + y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) + # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # update colors bug #3195 + ax[0].set_ylabel("instances") + if 0 < len(names) < 30: + ax[0].set_xticks(range(len(names))) + ax[0].set_xticklabels(names, rotation=90, fontsize=10) + else: + ax[0].set_xlabel("classes") + sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9) + sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9) + + # rectangles + labels[:, 1:3] = 0.5 # center + labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 + img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) + for cls, *box in labels[:1000]: + ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot + ax[1].imshow(img) + ax[1].axis("off") + + for a in [0, 1, 2, 3]: + for s in ["top", "right", "left", "bottom"]: + ax[a].spines[s].set_visible(False) + + plt.savefig(save_dir / "labels.jpg", dpi=200) + matplotlib.use("Agg") + plt.close() + + +def profile_idetection(start=0, stop=0, labels=(), save_dir=""): + # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() + ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() + s = ["Images", "Free Storage (GB)", "RAM Usage (GB)", "Battery", "dt_raw (ms)", "dt_smooth (ms)", + "real-world FPS", ] + files = list(Path(save_dir).glob("frames*.txt")) + for fi, f in enumerate(files): + try: + results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows + n = results.shape[1] # number of rows + x = np.arange(start, min(stop, n) if stop else n) + results = results[:, x] + t = results[0] - results[0].min() # set t0=0s + results[0] = x + for i, a in enumerate(ax): + if i < len(results): + label = labels[fi] if len(labels) else f.stem.replace("frames_", "") + a.plot(t, results[i], marker=".", label=label, linewidth=1, markersize=5, ) + a.set_title(s[i]) + a.set_xlabel("time (s)") + # if fi == len(files) - 1: + # a.set_ylim(bottom=0) + for side in ["top", "right"]: + a.spines[side].set_visible(False) + else: + a.remove() + except Exception as e: + print("Warning: Plotting error for %s; %s" % (f, e)) + ax[1].legend() + plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200) + + +def plot_evolve(evolve_csv="path/to/evolve.csv", ): # from utils.plots import *; plot_evolve() + # Plot evolve.csv hyp evolution results + evolve_csv = Path(evolve_csv) + data = pd.read_csv(evolve_csv) + keys = [x.strip() for x in data.columns] + x = data.values + f = fitness(x) + j = np.argmax(f) # max fitness index + plt.figure(figsize=(10, 12), tight_layout=True) + matplotlib.rc("font", **{"size": 8}) + for i, k in enumerate(keys[7:]): + v = x[:, 7 + i] + mu = v[j] # best single result + plt.subplot(6, 5, i + 1) + plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none") + plt.plot(mu, f.max(), "k+", markersize=15) + plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9}) # limit to 40 characters + if i % 5 != 0: + plt.yticks([]) + print("%15s: %.3g" % (k, mu)) + f = evolve_csv.with_suffix(".png") # filename + plt.savefig(f, dpi=200) + plt.close() + print(f"Saved {f}") + + +def plot_results(file="path/to/results.csv", dir="", best=True): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob("results*.csv")) + assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." + for _, f in enumerate(files): + try: + data = pd.read_csv(f) + index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6]) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]): + y = data.values[:, j] + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) + if best: + # best + ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, ) + ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") + else: + # last + ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) + ax[i].set_title(s[ + j] + f"\n{round(y[-1], 5)}") # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print(f"Warning: Plotting error for {f}: {e}") + ax[1].legend() + fig.savefig(save_dir / "results.png", dpi=200) + plt.close() + + +def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob("results*.csv")) + assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." + for _, f in enumerate(files): + try: + data = pd.read_csv(f) + index = np.argmax( + 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 0.1 * data.values[:, + 11], ) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): + y = data.values[:, j] + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) + if best: + # best + ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, ) + ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") + else: + # last + ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) + ax[i].set_title(s[ + j] + f"\n{round(y[-1], 5)}") # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print(f"Warning: Plotting error for {f}: {e}") + ax[1].legend() + fig.savefig(save_dir / "results.png", dpi=200) + plt.close() + + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + import random + + # Plots one bounding box on image img + tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1) # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA, ) + + +def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")): + """ + x: Features to be visualized + module_type: Module type + stage: Module stage within model + n: Maximum number of feature maps to plot + save_dir: Directory to save results + """ + if "Detect" not in module_type: + batch, channels, height, width = x.shape # batch, channels, height, width + if height > 1 and width > 1: + f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename + + blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels + n = min(n, channels) # number of plots + fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols + ax = ax.ravel() + plt.subplots_adjust(wspace=0.05, hspace=0.05) + for i in range(n): + ax[i].imshow(blocks[i].squeeze()) # cmap='gray' + ax[i].axis("off") + + print(f"Saving {save_dir / f}... ({n}/{channels})") + plt.savefig(save_dir / f, dpi=300, bbox_inches="tight") + plt.close() + + +def plot_images_and_masks(images, targets, masks, paths=None, fname="images.jpg", names=None, max_size=640, + max_subplots=16, ): + # Plot image grid with labels + # print("targets:", targets.shape) + # print("masks:", masks.shape) + # print('--------------------------') + + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + if isinstance(masks, torch.Tensor): + masks = masks.cpu().numpy() + masks = masks.astype(int) + + # un-normalise + if np.max(images[0]) <= 1: + images *= 255 + + tl = 3 # line thickness + tf = max(tl - 1, 1) # font thickness + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + + # Check if we should resize + scale_factor = max_size / max(h, w) + if scale_factor < 1: + h = math.ceil(scale_factor * h) + w = math.ceil(scale_factor * w) + + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, img in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + + block_x = int(w * (i // ns)) + block_y = int(h * (i % ns)) + + img = img.transpose(1, 2, 0) + if scale_factor < 1: + img = cv2.resize(img, (w, h)) + + mosaic[block_y: block_y + h, block_x: block_x + w, :] = img + if len(targets) > 0: + idx = (targets[:, 0]).astype(int) + image_targets = targets[idx == i] + # print(targets.shape) + # print(masks.shape) + image_masks = masks[idx == i] + # mosaic_masks + # mosaic_masks[block_y:block_y + h, + # block_x:block_x + w, :] = image_masks + boxes = xywh2xyxy(image_targets[:, 2:6]).T + classes = image_targets[:, 1].astype("int") + labels = image_targets.shape[1] == 6 # labels if no conf column + conf = (None if labels else image_targets[:, 6]) # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale_factor < 1: # absolute coords need scale if image scales + boxes *= scale_factor + boxes[[0, 2]] += block_x + boxes[[1, 3]] += block_y + for j, box in enumerate(boxes.T): + cls = int(classes[j]) + color = colors(cls) + cls = names[cls] if names else cls + mask = image_masks[j].astype(np.bool) + # print(mask.shape) + # print(mosaic.shape) + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j]) + plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) + mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] = \ + mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65) + + # Draw image filename labels + if paths: + label = Path(paths[i]).name[:40] # trim to 40 char + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, + lineType=cv2.LINE_AA, ) + + # Image border + cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3, ) + + if fname: + r = min(1280.0 / max(h, w) / ns, 1.0) # ratio to limit image size + mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) + # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save + Image.fromarray(mosaic).save(fname) # PIL save + return mosaic + + +def plot_images_boxes_and_masks(images, targets, masks=None, paths=None, fname="images.jpg", names=None, max_size=640, + max_subplots=16, ): + if masks is not None: + return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots) + else: + return plot_images(images, targets, paths, fname, names, max_size, max_subplots) + + +def plot_masks(img, masks, colors, alpha=0.5): + """ + Args: + img (tensor): img on cuda, shape: [3, h, w], range: [0, 1] + masks (tensor): predicted masks on cuda, shape: [n, h, w] + colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] + Return: + img after draw masks, shape: [h, w, 3] + + transform colors and send img_gpu to cpu for the most time. + """ + img_gpu = img.clone() + num_masks = len(masks) + # [n, 1, 1, 3] + # faster this way to transform colors + colors = torch.tensor(colors, device=img.device).float() / 255.0 + colors = colors[:, None, None, :] + # [n, h, w, 1] + masks = masks[:, :, :, None] + masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha + inv_alph_masks = masks * (-alpha) + 1 + masks_color_summand = masks_color[0] + if num_masks > 1: + inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0) + masks_color_cumul = masks_color[1:] * inv_alph_cumul + masks_color_summand += masks_color_cumul.sum(dim=0) + + # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1] + img_gpu = img_gpu.flip(dims=[0]) # filp channel for opencv + img_gpu = img_gpu.permute(1, 2, 0).contiguous() + # [h, w, 3] + img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand + return (img_gpu * 255).byte().cpu().numpy() From 16a0163bd67232102c9ba5d1dda058119bf52c44 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Tue, 19 Jul 2022 15:32:54 +0800 Subject: [PATCH 035/247] add overlap mask option --- evaluator.py | 22 +++++++--- seg_dataloaders.py | 85 +++++++++++++++++++++++++++------------ train_instseg.py | 21 ++++++---- utils/loggers/__init__.py | 13 ++---- utils/plots.py | 17 +++++--- utils/seg_loss.py | 40 +++++++++++++----- 6 files changed, 131 insertions(+), 67 deletions(-) diff --git a/evaluator.py b/evaluator.py index 5d92244af5c0..3b20f4971635 100644 --- a/evaluator.py +++ b/evaluator.py @@ -64,7 +64,7 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): class Yolov5Evaluator: def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False, project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True, - max_plot_dets=10, mask=False, mask_downsample_ratio=1, ) -> None: + max_plot_dets=10, mask=False, mask_downsample_ratio=1, overlap=False) -> None: self.data = check_dataset(data) # check self.conf_thres = conf_thres # confidence threshold self.iou_thres = iou_thres # NMS IoU threshold @@ -82,6 +82,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls= self.max_plot_dets = max_plot_dets self.mask = mask self.mask_downsample_ratio = mask_downsample_ratio + self.overlap = overlap self.nc = 1 if self.single_cls else int(self.data["nc"]) # number of classes self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 @@ -130,7 +131,7 @@ def run_training(self, model, dataloader, compute_loss=None): img = img.to(self.device, non_blocking=True) targets = targets.to(self.device) if masks is not None: - masks = masks.to(self.device) + masks = masks.to(self.device).float() out, train_out = self.inference(model, img, targets, masks, compute_loss) # Statistics per image @@ -139,7 +140,8 @@ def run_training(self, model, dataloader, compute_loss=None): # eval in every image level labels = targets[targets[:, 0] == si, 1:] - gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + midx = [si] if self.overlap else targets[:, 0] == si + gt_masksi = masks[midx] if masks is not None else None # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None @@ -181,7 +183,7 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_ img = img.to(self.device, non_blocking=True) targets = targets.to(self.device) if masks is not None: - masks = masks.to(self.device) + masks = masks.to(self.device).float() out, train_out = self.inference(model, img, targets, masks) # Statistics per image @@ -193,7 +195,8 @@ def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_ # eval in every image level labels = targets[targets[:, 0] == si, 1:] - gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + midx = [si] if self.overlap else targets[:, 0] == si + gt_masksi = masks[midx] if masks is not None else None # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None @@ -390,6 +393,13 @@ def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, ) + # convert masks (1, 640, 640) -> (n, 640, 640) + if self.overlap: + nl = len(labels) + index = torch.arange(nl, device=gt_masksi.device).view(nl, 1, 1) + 1 + gt_masksi = gt_masksi.repeat(nl, 1, 1) + gt_masksi = torch.where(gt_masksi == index, 1.0, 0.0) + if gt_masksi.shape[1:] != pred_maski.shape[1:]: gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear", align_corners=False, ).squeeze(0) @@ -462,7 +472,7 @@ def plot_images(self, i, img, targets, masks, out, paths): if masks is not None and masks.shape[1:] != img.shape[2:]: masks = F.interpolate( - masks.unsqueeze(0), + masks.unsqueeze(0).float(), img.shape[2:], mode="bilinear", align_corners=False, diff --git a/seg_dataloaders.py b/seg_dataloaders.py index 32f3e0af7127..ac6da36fab09 100644 --- a/seg_dataloaders.py +++ b/seg_dataloaders.py @@ -61,7 +61,7 @@ def __iter__(self): def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, neg_dir="", - bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, ): + bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False): if rect and shuffle: print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False") shuffle = False @@ -75,6 +75,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non prefix=prefix, neg_dir=neg_dir, bg_dir=bg_dir, area_thr=area_thr, ) if mask_head: dataset.downsample_ratio = mask_downsample_ratio + dataset.overlap = overlap_mask batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers @@ -454,11 +455,12 @@ def collate_fn4(batch): class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0, prefix="", neg_dir="", bg_dir="", area_thr=0.2, - downsample_ratio=1, # return dowmsample mask + downsample_ratio=1, overlap=False, ): super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, stride, pad, prefix, neg_dir, bg_dir, area_thr, ) self.downsample_ratio = downsample_ratio + self.overlap = overlap @Dataset.mosaic_getitem def __getitem__(self, index): @@ -506,13 +508,18 @@ def __getitem__(self, index): nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) - for si in range(len(segments)): - mask = polygon2mask_downsample(img.shape[:2], [segments[si].reshape(-1)], - downsample_ratio=self.downsample_ratio, ) - masks.append(torch.from_numpy(mask.astype(np.float32))) + if self.overlap: + masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, + downsample_ratio=self.downsample_ratio) + masks = masks[None] # (640, 640) -> (1, 640, 640) + labels = labels[sorted_idx] + else: + masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) - masks = (torch.stack(masks, axis=0) if len(masks) else torch.zeros(nl, img.shape[0] // self.downsample_ratio, - img.shape[1] // self.downsample_ratio)) + masks = (torch.from_numpy(masks) if len(masks) else + torch.zeros(1 if self.overlap else nl, + img.shape[0] // self.downsample_ratio, + img.shape[1] // self.downsample_ratio)) # TODO: albumentations support if self.augment: # Albumentations @@ -930,23 +937,6 @@ def exif_transpose(image): return image -def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): - """ - Args: - img_size (tuple): The image size. - polygons (np.ndarray): [N, M], N is the number of polygons, - M is the number of points(Be divided by 2). - """ - img_size = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) - mask = np.zeros(img_size, dtype=np.uint8) - polygons = np.asarray(polygons) / downsample_ratio - polygons = polygons.astype(np.int32) - shape = polygons.shape - polygons = polygons.reshape(shape[0], -1, 2) - cv2.fillPoly(mask, polygons, color=color) - return mask - - def worker_init_reset_seed(worker_id): seed = uuid.uuid4().int % 2 ** 32 random.seed(seed) @@ -954,7 +944,7 @@ def worker_init_reset_seed(worker_id): np.random.seed(seed) -def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1): +def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): """ Args: img_size (tuple): The image size. @@ -968,10 +958,53 @@ def polygon2mask_downsample(img_size, polygons, color=1, downsample_ratio=1): polygons = polygons.reshape(shape[0], -1, 2) cv2.fillPoly(mask, polygons, color=color) nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) + # NOTE: fillPoly then resize is trying the keep the same way + # of loss calculation when mask-ratio=1. mask = cv2.resize(mask, (nw, nh)) return mask +def polygons2masks(img_size, polygons, color, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (list[np.ndarray]): each polygon is [N, M], + N is the number of polygons, + M is the number of points(Be divided by 2). + """ + masks = [] + for si in range(len(polygons)): + mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, + downsample_ratio) + masks.append(mask) + return np.array(masks) + + +def polygons2masks_overlap(img_size, segments, downsample_ratio=1): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), + dtype=np.uint8) + areas = [] + ms = [] + for si in range(len(segments)): + mask = polygon2mask( + img_size, + [segments[si].reshape(-1)], + downsample_ratio=downsample_ratio, + color=1, + ) + ms.append(mask) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + mask = ms[i] * (i + 1) + masks = masks + mask + masks = np.clip(masks, a_min=0, a_max=i + 1) + return masks, index + + def img2label_paths(img_paths): # Define label paths as a function of image paths sa, sb = (os.sep + "images" + os.sep, os.sep + "labels" + os.sep,) # /images/, /labels/ substrings diff --git a/train_instseg.py b/train_instseg.py index 304b86282fa8..29e9eb8c9536 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -78,7 +78,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir - last, best, last_mosiac = w / 'last.pt', w / 'best.pt', w / "last_mosaic.pt" + last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): @@ -107,6 +107,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Config plots = not evolve and not opt.noplots # create plots + overlap = opt.overlap_mask cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, True) with torch_distributed_zero_first(LOCAL_RANK): @@ -163,7 +164,8 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mask=True, verbose=False, mask_downsample_ratio=mask_ratio, - plots=plots + plots=plots, + overlap=overlap ) g = [], [], [] # optimizer parameter groups bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() @@ -249,7 +251,8 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio prefix=colorstr('train: '), mask_head=True, shuffle=True, - mask_downsample_ratio=mask_ratio + mask_downsample_ratio=mask_ratio, + overlap_mask=overlap, ) mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class print("mlc , nc ", mlc, " ", nc ) @@ -271,6 +274,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio pad=0.5, mask_head=True, mask_downsample_ratio=mask_ratio, + overlap_mask=overlap, prefix=colorstr('val: '))[0] if not resume: @@ -316,7 +320,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False - compute_loss = ComputeLoss(model) # init loss class + compute_loss = ComputeLoss(model, overlap=overlap) # init loss class callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' @@ -371,7 +375,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward - loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device)) # loss scaled by batch_size + loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float()) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: @@ -398,12 +402,12 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # for plots if mask_ratio != 1: masks = F.interpolate( - masks[None, :], + masks[None, :].float(), (imgsz, imgsz), mode="bilinear", align_corners=False, ).squeeze(0) - callbacks.run('on_train_batch_end', ni, model, imgs, targets,masks, paths, plots, opt.sync_bn, None) + callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots, opt.sync_bn) if callbacks.stop_training: return @@ -525,7 +529,8 @@ def parse_opt(known=False): parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') - parser.add_argument('--mask-ratio', type=int, default=1, help='mask ratio') + parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory') + parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease') # Weights & Biases arguments parser.add_argument('--entity', default=None, help='W&B: Entity') diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index c80c8077baca..a142f607561e 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -377,9 +377,7 @@ def __init__( ] - def on_train_batch_end( - self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx - ): + def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots, sync_bn): # Callback runs on train batch end if plots and self.save_dir.exists(): if ni == 0: @@ -394,14 +392,9 @@ def on_train_batch_end( ), [], ) - if plot_idx is not None and ni in plot_idx: - # if ni < 3: + if ni < 3: f = self.save_dir / f"train_batch{ni}.jpg" # filename - Thread( - target=plot_images_and_masks, - args=(imgs, targets, masks, paths, f), - daemon=True, - ).start() + plot_images_and_masks(imgs, targets, masks, paths, f) if self.wandb: wandb.log({"train_labels": wandb.Image(str(f))}) diff --git a/utils/plots.py b/utils/plots.py index f5cd3578929d..11a0c859e152 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -1235,12 +1235,17 @@ def plot_images_and_masks( if len(targets) > 0: idx = (targets[:, 0]).astype(int) image_targets = targets[idx == i] - # print(targets.shape) - # print(masks.shape) - image_masks = masks[idx == i] - # mosaic_masks - # mosaic_masks[block_y:block_y + h, - # block_x:block_x + w, :] = image_masks + + if masks.max() > 1.0: # mean that masks are overlap + image_masks = masks[[i]] # (1, 640, 640) + # convert masks (1, 640, 640) -> (n, 640, 640) + nl = len(image_targets) + index = np.arange(nl).reshape(nl, 1, 1) + 1 + image_masks = np.repeat(image_masks, nl, axis=0) + image_masks = np.where(image_masks == index, 1.0, 0.0) + else: + image_masks = masks[idx == i] + boxes = xywh2xyxy(image_targets[:, 2:6]).T classes = image_targets[:, 1].astype("int") labels = image_targets.shape[1] == 6 # labels if no conf column diff --git a/utils/seg_loss.py b/utils/seg_loss.py index e74b82adae7b..e5294a5300f7 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -12,8 +12,9 @@ class ComputeLoss: # Compute losses - def __init__(self, model, autobalance=False): + def __init__(self, model, autobalance=False, overlap=False): self.sort_obj_iou = False + self.overlap = overlap device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters @@ -141,8 +142,7 @@ def loss_segment(self, preds, targets, masks): lcls += self.BCEcls(ps[:, self.nm:], t) # BCE # Mask Regression - mask_gt = masks[tidxs[i]] - downsampled_masks = F.interpolate(mask_gt[None, :], (mask_h, mask_w), mode="bilinear", + downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), mode="bilinear", align_corners=False, ).squeeze(0) mxywh = xywh[i] @@ -155,8 +155,15 @@ def loss_segment(self, preds, targets, masks): batch_lseg = torch.zeros(1, device=device) for bi in b.unique(): index = b == bi - mask_gti = downsampled_masks[index] - mask_gti = mask_gti.permute(1, 2, 0).contiguous() + if self.overlap: + mask_index = tidxs[i][index] + # h, w, n + mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum()) + # h, w, n + mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0) + else: + mask_gti = downsampled_masks[tidxs[i]][index] + mask_gti = mask_gti.permute(1, 2, 0).contiguous() mw, mh = mws[index], mhs[index] mxyxy = mxyxys[index] @@ -191,10 +198,6 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): lseg = lseg.mean(dim=(0, 1)) / w / h return lseg.mean() - def mask_loss(self, gt_masks, preds, protos, xyxys, ws, hs): - """mask loss of batches.""" - pass - def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets @@ -257,8 +260,23 @@ def build_targets_for_masks(self, p, targets): gain = torch.ones(8, device=targets.device) # normalized to gridspace gain ai = ( torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)) # same as .repeat_interleave(nt) - ti = ( - torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)) # same as .repeat_interleave(nt) + if self.overlap: + batch = p[0].shape[0] + ti = [] + for i in range(batch): + # find number of targets of each image + num = (targets[:, 0] == i).sum() + # (na, num) + ti.append( + torch.arange(num, device=targets.device) + .float() + .view(1, num) + .repeat(na, 1) + 1) + # (na, nt) + ti = torch.cat(ti, 1) + else: + ti = ( + torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2) # append anchor indices From efa5dcf567be3f4cae637f8d30e7be4df47b60e0 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Tue, 19 Jul 2022 17:12:37 +0800 Subject: [PATCH 036/247] fix mAP issue --- models/yolo.py | 2 +- train_instseg.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index e6860a9d7435..245d65453828 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -131,7 +131,7 @@ def forward(self, x): y[..., 0:5] = y[..., 0:5].sigmoid() y[..., self.nm:] = y[..., self.nm:].sigmoid() if self.inplace: - y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy diff --git a/train_instseg.py b/train_instseg.py index 29e9eb8c9536..b1ea72ff5757 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -316,7 +316,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class - results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False From e260a10f906e032d9b3376474020581c4fb10a25 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 19 Jul 2022 18:42:55 +0530 Subject: [PATCH 037/247] restore evaluate.py --- evaluate.py | 810 +++++++++++++++++++++++++++++++++++++++++++++++ train_instseg.py | 2 +- 2 files changed, 811 insertions(+), 1 deletion(-) create mode 100644 evaluate.py diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 000000000000..24f2e40b95db --- /dev/null +++ b/evaluate.py @@ -0,0 +1,810 @@ + +import json +from pathlib import Path +from threading import Thread + +import numpy as np +import torch +import torch.nn.functional as F +# import pycocotools.mask as mask_util +from tqdm import tqdm + +from models.experimental import attempt_load +from seg_dataloaders import create_dataloader +from utils.general import ( + coco80_to_coco91_class, + increment_path, + colorstr, check_dataset, check_img_size, check_suffix +) + +from utils.segment import ( + non_max_suppression_masks, + mask_iou, + process_mask, + process_mask_upsample, + scale_masks, +) +from utils.boxes import ( + box_iou, + non_max_suppression, + scale_coords, + xyxy2xywh, + xywh2xyxy, +) +from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix +from utils.seg_plots import output_to_target, plot_images_boxes_and_masks +from utils.torch_utils import select_device, time_sync + + +def save_one_txt(predn, save_conf, shape, file): + # Save one txt result + gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh + for *xyxy, conf, cls in predn.tolist(): + xywh = ( + (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() + ) # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(file, "a") as f: + f.write(("%g " * len(line)).rstrip() % line + "\n") + + +def save_one_json(predn, jdict, path, class_map, pred_masks=None): + # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} + image_id = int(path.stem) if path.stem.isnumeric() else path.stem + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + + if pred_masks is not None: + pred_masks = np.transpose(pred_masks, (2, 0, 1)) + rles = [ + mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in pred_masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): + pred_dict = { + "image_id": image_id, + "category_id": class_map[int(p[5])], + "bbox": [round(x, 3) for x in b], + "score": round(p[4], 5), + } + if pred_masks is not None: + pred_dict["segmentation"] = rles[i] + jdict.append(pred_dict) + + +@torch.no_grad() +class Yolov5Evaluator: + def __init__( + self, + data, + conf_thres=0.001, + iou_thres=0.6, + device="", + single_cls=False, + augment=False, + verbose=False, + project="runs/val", + name="exp", + exist_ok=False, + half=True, + save_dir=Path(""), + nosave=False, + plots=True, + mask=False, + mask_downsample_ratio=1, + ) -> None: + self.data = check_dataset(data) # check + self.conf_thres = conf_thres # confidence threshold + self.iou_thres = iou_thres # NMS IoU threshold + self.device = device # cuda device, i.e. 0 or 0,1,2,3 or cpu + self.single_cls = single_cls # treat as single-class dataset + self.augment = augment # augmented inference + self.verbose = verbose # verbose output + self.project = project # save to project/name + self.name = name # save to project/name + self.exist_ok = exist_ok # existing project/name ok, do not increment + self.half = half # use FP16 half-precision inference + self.save_dir = save_dir + self.nosave = nosave + self.plots = plots + self.mask = mask + self.mask_downsample_ratio = mask_downsample_ratio + + self.nc = 1 if self.single_cls else int(self.data["nc"]) # number of classes + self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 + self.niou = self.iouv.numel() + self.confusion_matrix = ConfusionMatrix(nc=self.nc) + self.dt = [0.0, 0.0, 0.0] + self.names = {k: v for k, v in enumerate(self.data["names"])} + self.s = ( + ("%20s" + "%11s" * 10) + % ( + "Class", + "Images", + "Labels", + "Box:{P", + "R", + "mAP@.5", + "mAP@.5:.95}", + "Mask:{P", + "R", + "mAP@.5", + "mAP@.5:.95}", + ) + if self.mask + else ("%20s" + "%11s" * 6) + % ( + "Class", + "Images", + "Labels", + "P", + "R", + "mAP@.5", + "mAP@.5:.95", + ) + ) + + # coco stuff + self.is_coco = isinstance(self.data.get("val"), str) and self.data[ + "val" + ].endswith( + "coco/val2017.txt" + ) # COCO dataset + self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.jdict = [] + self.iou_thres = 0.65 if self.is_coco else self.iou_thres + + # masks stuff + self.pred_masks = [] # for mask visualization + + # metric stuff + self.seen = 0 + self.stats = [] + self.total_loss = torch.zeros((4 if self.mask else 3)) + self.metric = Metrics() if self.mask else Metric() + + def run_training(self, model, dataloader, compute_loss=None): + """This is for evaluation when training.""" + self.seen = 0 + self.device = next(model.parameters()).device # get model device + # self.iouv.to(self.device) + self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device) + self.half &= self.device.type != "cpu" # half precision only supported on CUDA + model.half() if self.half else model.float() + # Configure + model.eval() + + # inference + # masks will be `None` if training objection. + for batch_i, (img, targets, paths, shapes, masks) in enumerate( + tqdm(dataloader, desc=self.s) + ): + # reset pred_masks + self.pred_masks = [] + img = img.to(self.device, non_blocking=True) + targets = targets.to(self.device) + if masks is not None: + masks = masks.to(self.device) + out, train_out = self.inference(model, img, targets, masks, compute_loss) + + # Statistics per image + for si, pred in enumerate(out): + self.seen += 1 + + # eval in every image level + labels = targets[targets[:, 0] == si, 1:] + gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + + # get predition masks + proto_out = train_out[1][si] if isinstance(train_out, tuple) else None + pred_maski = self.get_predmasks( + pred, + proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, + ) + + # for visualization + if self.plots and batch_i < 3 and pred_maski is not None: + self.pred_masks.append(pred_maski.cpu()) + + # NOTE: eval in training image-size space + self.compute_stat(pred, pred_maski, labels, gt_masksi) + + if self.plots and batch_i < 3: + self.plot_images(batch_i, img, targets, masks, out, paths) + + # compute map and print it. + t = self.after_infer() + + # Return results + model.float() # for training + return ( + ( + *self.metric.mean_results(), + *(self.total_loss.cpu() / len(dataloader)).tolist(), + ), + self.metric.get_maps(self.nc), + t, + ) + + def run( + self, + weights, + batch_size, + imgsz, + save_txt=False, + save_conf=False, + save_json=False, + task="val", + ): + """This is for native evaluation.""" + model, dataloader, imgsz = self.before_infer( + weights, batch_size, imgsz, save_txt, task + ) + self.seen = 0 + # self.iouv.to(self.device) + self.half &= self.device.type != "cpu" # half precision only supported on CUDA + model.half() if self.half else model.float() + # Configure + model.eval() + + # inference + for batch_i, (img, targets, paths, shapes, masks) in enumerate( + tqdm(dataloader, desc=self.s) + ): + # reset pred_masks + self.pred_masks = [] + img = img.to(self.device, non_blocking=True) + targets = targets.to(self.device) + if masks is not None: + masks = masks.to(self.device) + out, train_out = self.inference(model, img, targets, masks) + + # Statistics per image + for si, pred in enumerate(out): + self.seen += 1 + path = Path(paths[si]) + shape = shapes[si][0] + ratio_pad = shapes[si][1] + + # eval in every image level + labels = targets[targets[:, 0] == si, 1:] + gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + + # get predition masks + proto_out = train_out[1][si] if isinstance(train_out, tuple) else None + pred_maski = self.get_predmasks( + pred, + proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, + ) + + # for visualization + if self.plots and batch_i < 3 and pred_maski is not None: + self.pred_masks.append(pred_maski.cpu()) + + # NOTE: eval in training image-size space + self.compute_stat(pred, pred_maski, labels, gt_masksi) + + # no preditions, not save anything + if len(pred) == 0: + continue + + if save_txt or save_json: + # clone() is for plot_images work correctly + predn = pred.clone() + # 因为test时添加了0.5的padding,因此这里与数据加载的padding不一致,所以需要转入ratio_pad + scale_coords( + img[si].shape[1:], predn[:, :4], shape, ratio_pad + ) # native-space pred + # Save/log + if save_txt and self.save_dir.exists(): + # NOTE: convert coords to native space when save txt. + # support save box preditions only + save_one_txt( + predn, + save_conf, + shape, + file=self.save_dir / "labels" / (path.stem + ".txt"), + ) + if save_json and self.save_dir.exists(): + # NOTE: convert coords to native space when save json. + # if pred_maski is not None: + # h, w, n + pred_maski = scale_masks( + img[si].shape[1:], + pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, + ratio_pad, + ) + save_one_json( + predn, + self.jdict, + path, + self.class_map, + pred_maski, + ) # append to COCO-JSON dictionary + + if self.plots and batch_i < 3: + self.plot_images(batch_i, img, targets, masks, out, paths) + + # compute map and print it. + t = self.after_infer() + + # save json + if self.save_dir.exists() and save_json: + pred_json = str(self.save_dir / f"predictions.json") # predictions json + print(f"\nEvaluating pycocotools mAP... saving {pred_json}...") + with open(pred_json, "w") as f: + json.dump(self.jdict, f) + + # Print speeds + shape = (batch_size, 3, imgsz, imgsz) + print( + f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" + % t + ) + + s = ( + f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" + if save_txt and self.save_dir.exists() + else "" + ) + print( + f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}" + ) + + # Return results + return ( + ( + *self.metric.mean_results(), + *(self.total_loss.cpu() / len(dataloader)).tolist(), + ), + self.metric.get_maps(self.nc), + t, + ) + + def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): + "prepare for evaluation without training." + self.device = select_device(self.device, batch_size=batch_size) + + # Directories + self.save_dir = increment_path( + Path(self.project) / self.name, exist_ok=self.exist_ok + ) # increment run + if not self.nosave: + (self.save_dir / "labels" if save_txt else self.save_dir).mkdir( + parents=True, exist_ok=True + ) # make dir + + # Load model + check_suffix(weights, ".pt") + model = attempt_load(weights, map_location=self.device) # load FP32 model + gs = max(int(model.stride.max()), 32) # grid size (max stride) + imgsz = check_img_size(imgsz, s=gs) # check image size + + # Data + if self.device.type != "cpu": + model( + torch.zeros(1, 3, imgsz, imgsz) + .to(self.device) + .type_as(next(model.parameters())) + ) # run once + pad = 0.0 if task == "speed" else 0.5 + task = ( + task if task in ("train", "val", "test") else "val" + ) # path to train/val/test images + dataloader = create_dataloader( + self.data[task], + imgsz, + batch_size, + gs, + self.single_cls, + pad=pad, + rect=True, + prefix=colorstr(f"{task}: "), + mask_head=self.mask, + mask_downsample_ratio=self.mask_downsample_ratio, + )[0] + return model, dataloader, imgsz + + def inference(self, model, img, targets, masks=None, compute_loss=None): + """Inference""" + t1 = time_sync() + img = img.half() if self.half else img.float() # uint8 to fp16/32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + _, _, height, width = img.shape # batch size, channels, height, width + t2 = time_sync() + self.dt[0] += t2 - t1 + + # Run model + out, train_out = model( + img, augment=self.augment + ) # inference and training outputs + self.dt[1] += time_sync() - t2 + + # Compute loss + if compute_loss: + self.total_loss += compute_loss(train_out, targets, masks)[ + 1 + ] # box, obj, cls + + # Run NMS + targets[:, 2:] *= torch.Tensor([width, height, width, height]).to( + self.device + ) # to pixels + t3 = time_sync() + out = self.nms( + prediction=out, + conf_thres=self.conf_thres, + iou_thres=self.iou_thres, + multi_label=True, + agnostic=self.single_cls, + ) + self.dt[2] += time_sync() - t3 + return out, train_out + + def after_infer(self): + """Do something after inference, such as plots and get metrics. + Return: + t(tuple): speeds of per image. + """ + # Plot confusion matrix + if self.plots and self.save_dir.exists(): + self.confusion_matrix.plot( + save_dir=self.save_dir, names=list(self.names.values()) + ) + + # Compute statistics + stats = [np.concatenate(x, 0) for x in zip(*self.stats)] # to numpy + box_or_mask_any = stats[0].any() or stats[1].any() + stats = stats[1:] if not self.mask else stats + if len(stats) and box_or_mask_any: + results = self.ap_per_class( + *stats, + self.plots, + self.save_dir if self.save_dir.exists() else None, + self.names, + ) + self.metric.update(results) + nt = np.bincount( + stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc + ) # number of targets per class + else: + nt = torch.zeros(1) + + # make this empty, cause make `stats` self is for reduce some duplicated codes. + self.stats = [] + # print information + self.print_metric(nt, stats) + t = tuple(x / self.seen * 1e3 for x in self.dt) # speeds per image + return t + + def process_batch(self, detections, labels, iouv): + """ + Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. + Arguments: + detections (Array[N, 6]), x1, y1, x2, y2, conf, class + labels (Array[M, 5]), class, x1, y1, x2, y2 + Returns: + correct (Array[N, 10]), for 10 IoU levels + """ + correct = torch.zeros( + detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device + ) + iou = box_iou(labels[:, 1:], detections[:, :4]) + x = torch.where( + (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]) + ) # IoU above threshold and classes match + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) # [label, detection, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + matches = torch.Tensor(matches).to(iouv.device) + correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv + return correct + + def get_predmasks(self, pred, proto_out, gt_shape): + """Get pred masks in different ways. + 1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size) + mask for saving cuda memory. + 2. process_mask_upsample, for val after training to get high quality mask(original size). + + Args: + pred(torch.Tensor): output of network, (N, 5 + mask_dim + class). + proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w). + gt_shape(tuple): shape of gt mask, this shape may not equal to input size of + input image, Cause the mask_downsample_ratio. + Return: + pred_mask(torch.Tensor): predition of final masks with the same size with + input image, (N, input_h, input_w). + """ + if proto_out is None or len(pred) == 0: + return None + process = process_mask_upsample if self.plots else process_mask + gt_shape = ( + gt_shape[0] * self.mask_downsample_ratio, + gt_shape[1] * self.mask_downsample_ratio, + ) + # n, h, w + pred_mask = ( + process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape) + .permute(2, 0, 1) + .contiguous() + ) + return pred_mask + + def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): + assert not ( + (pred_maski is None) ^ (gt_masksi is None) + ), "`proto_out` and `gt_masksi` should be both None or both exist." + if pred_maski is None and gt_masksi is None: + return torch.zeros(0, self.niou, dtype=torch.bool) + + correct = torch.zeros( + predn.shape[0], + self.iouv.shape[0], + dtype=torch.bool, + device=self.iouv.device, + ) + + if not self.plots: + gt_masksi = F.interpolate( + gt_masksi.unsqueeze(0), + pred_maski.shape[1:], + mode="bilinear", + align_corners=False, + ).squeeze(0) + + iou = mask_iou( + gt_masksi.view(gt_masksi.shape[0], -1), + pred_maski.view(pred_maski.shape[0], -1), + ) + x = torch.where( + (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]) + ) # IoU above threshold and classes match + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) # [label, detection, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + matches = torch.Tensor(matches).to(self.iouv.device) + correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv + return correct + + def compute_stat(self, predn, pred_maski, labels, gt_maski): + """Compute states about ious. with boxs size in training img-size space.""" + nl = len(labels) + tcls = labels[:, 0].tolist() if nl else [] # target class + + if len(predn) == 0: + if nl: + self.stats.append( + ( + torch.zeros(0, self.niou, dtype=torch.bool), # boxes + torch.zeros(0, self.niou, dtype=torch.bool), # masks + torch.Tensor(), + torch.Tensor(), + tcls, + ) + ) + return + + # Predictions + if self.single_cls: + predn[:, 5] = 0 + + # Evaluate + if nl: + tbox = xywh2xyxy(labels[:, 1:5]) # target boxes + labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels + # boxes + correct_boxes = self.process_batch(predn, labelsn, self.iouv) + + # masks + correct_masks = self.process_batch_masks( + predn, pred_maski, gt_maski, labelsn + ) + + if self.plots: + self.confusion_matrix.process_batch(predn, labelsn) + else: + correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) + correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) + self.stats.append( + ( + correct_masks.cpu(), + correct_boxes.cpu(), + predn[:, 4].cpu(), + predn[:, 5].cpu(), + tcls, + ) + ) # (correct, conf, pcls, tcls) + + def print_metric(self, nt, stats): + # Print results + pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4) + print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results())) + + # Print results per class + # TODO: self.seen support verbose. + if self.verbose and self.nc > 1 and len(stats): + for i, c in enumerate(self.metric.ap_class_index): + print( + pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)) + ) + + def plot_images(self, i, img, targets, masks, out, paths): + if not self.save_dir.exists(): + return + # plot ground truth + f = self.save_dir / f"val_batch{i}_labels.jpg" # labels + + Thread( + target=plot_images_boxes_and_masks, + args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), + daemon=True, + ).start() + f = self.save_dir / f"val_batch{i}_pred.jpg" # predictions + + # plot predition + if len(self.pred_masks): + pred_masks = ( + torch.cat(self.pred_masks, dim=0) + if len(self.pred_masks) > 1 + else self.pred_masks[0] + ) + else: + pred_masks = None + Thread( + target=plot_images_boxes_and_masks, + args=( + img, + output_to_target(out), + pred_masks, + paths, + f, + self.names, + max(img.shape[2:]), + ), + daemon=True, + ).start() + + def nms(self, **kwargs): + return ( + non_max_suppression_masks(**kwargs) + if self.mask + else non_max_suppression(**kwargs) + ) + + def ap_per_class(self, *args): + return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args) + + +class Metric: + def __init__(self) -> None: + self.p = [] # (nc, ) + self.r = [] # (nc, ) + self.f1 = [] # (nc, ) + self.all_ap = [] # (nc, 10) + self.ap_class_index = [] # (nc, ) + + @property + def ap50(self): + """AP@0.5 of all classes. + Return: + (nc, ) or []. + """ + return self.all_ap[:, 0] if len(self.all_ap) else [] + + @property + def ap(self): + """AP@0.5:0.95 + Return: + (nc, ) or []. + """ + return self.all_ap.mean(1) if len(self.all_ap) else [] + + @property + def mp(self): + """mean precision of all classes. + Return: + float. + """ + return self.p.mean() if len(self.p) else 0.0 + + @property + def mr(self): + """mean recall of all classes. + Return: + float. + """ + return self.r.mean() if len(self.r) else 0.0 + + @property + def map50(self): + """Mean AP@0.5 of all classes. + Return: + float. + """ + return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 + + @property + def map(self): + """Mean AP@0.5:0.95 of all classes. + Return: + float. + """ + return self.all_ap.mean() if len(self.all_ap) else 0.0 + + def mean_results(self): + """Mean of results, return mp, mr, map50, map""" + return (self.mp, self.mr, self.map50, self.map) + + def class_result(self, i): + """class-aware result, return p[i], r[i], ap50[i], ap[i]""" + return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) + + def get_maps(self, nc): + maps = np.zeros(nc) + self.map + for i, c in enumerate(self.ap_class_index): + maps[c] = self.ap[i] + return maps + + def update(self, results): + """ + Args: + results: tuple(p, r, ap, f1, ap_class) + """ + p, r, all_ap, f1, ap_class_index = results + self.p = p + self.r = r + self.all_ap = all_ap + self.f1 = f1 + self.ap_class_index = ap_class_index + + +class Metrics: + """Metric for boxes and masks.""" + + def __init__(self) -> None: + self.metric_box = Metric() + self.metric_mask = Metric() + + def update(self, results): + """ + Args: + results: Dict{'boxes': Dict{}, 'masks': Dict{}} + """ + self.metric_box.update(list(results["boxes"].values())) + self.metric_mask.update(list(results["masks"].values())) + + def mean_results(self): + return self.metric_box.mean_results() + self.metric_mask.mean_results() + + def class_result(self, i): + return self.metric_box.class_result(i) + self.metric_mask.class_result(i) + + def get_maps(self, nc): + return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) + + @property + def ap_class_index(self): + # boxes and masks have the same ap_class_index + return self.metric_box.ap_class_index diff --git a/train_instseg.py b/train_instseg.py index f5ef6b15a580..b3c699c182e9 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -66,7 +66,7 @@ from torch.optim import AdamW import yaml from datetime import datetime -from eval_seg import Yolov5Evaluator +from evaluate import Yolov5Evaluator def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary print(device) From 0a5944a8aea1cff40d9379b7e935bfd4bc61f9cc Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 19 Jul 2022 18:43:55 +0530 Subject: [PATCH 038/247] restore evaluator --- evaluate.py | 810 ------------------------------------ eval_seg.py => evaluator.py | 0 train_instseg.py | 2 +- 3 files changed, 1 insertion(+), 811 deletions(-) delete mode 100644 evaluate.py rename eval_seg.py => evaluator.py (100%) diff --git a/evaluate.py b/evaluate.py deleted file mode 100644 index 24f2e40b95db..000000000000 --- a/evaluate.py +++ /dev/null @@ -1,810 +0,0 @@ - -import json -from pathlib import Path -from threading import Thread - -import numpy as np -import torch -import torch.nn.functional as F -# import pycocotools.mask as mask_util -from tqdm import tqdm - -from models.experimental import attempt_load -from seg_dataloaders import create_dataloader -from utils.general import ( - coco80_to_coco91_class, - increment_path, - colorstr, check_dataset, check_img_size, check_suffix -) - -from utils.segment import ( - non_max_suppression_masks, - mask_iou, - process_mask, - process_mask_upsample, - scale_masks, -) -from utils.boxes import ( - box_iou, - non_max_suppression, - scale_coords, - xyxy2xywh, - xywh2xyxy, -) -from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix -from utils.seg_plots import output_to_target, plot_images_boxes_and_masks -from utils.torch_utils import select_device, time_sync - - -def save_one_txt(predn, save_conf, shape, file): - # Save one txt result - gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh - for *xyxy, conf, cls in predn.tolist(): - xywh = ( - (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() - ) # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(file, "a") as f: - f.write(("%g " * len(line)).rstrip() % line + "\n") - - -def save_one_json(predn, jdict, path, class_map, pred_masks=None): - # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} - image_id = int(path.stem) if path.stem.isnumeric() else path.stem - box = xyxy2xywh(predn[:, :4]) # xywh - box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - - if pred_masks is not None: - pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [ - mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] - for mask in pred_masks - ] - for rle in rles: - rle["counts"] = rle["counts"].decode("utf-8") - - for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): - pred_dict = { - "image_id": image_id, - "category_id": class_map[int(p[5])], - "bbox": [round(x, 3) for x in b], - "score": round(p[4], 5), - } - if pred_masks is not None: - pred_dict["segmentation"] = rles[i] - jdict.append(pred_dict) - - -@torch.no_grad() -class Yolov5Evaluator: - def __init__( - self, - data, - conf_thres=0.001, - iou_thres=0.6, - device="", - single_cls=False, - augment=False, - verbose=False, - project="runs/val", - name="exp", - exist_ok=False, - half=True, - save_dir=Path(""), - nosave=False, - plots=True, - mask=False, - mask_downsample_ratio=1, - ) -> None: - self.data = check_dataset(data) # check - self.conf_thres = conf_thres # confidence threshold - self.iou_thres = iou_thres # NMS IoU threshold - self.device = device # cuda device, i.e. 0 or 0,1,2,3 or cpu - self.single_cls = single_cls # treat as single-class dataset - self.augment = augment # augmented inference - self.verbose = verbose # verbose output - self.project = project # save to project/name - self.name = name # save to project/name - self.exist_ok = exist_ok # existing project/name ok, do not increment - self.half = half # use FP16 half-precision inference - self.save_dir = save_dir - self.nosave = nosave - self.plots = plots - self.mask = mask - self.mask_downsample_ratio = mask_downsample_ratio - - self.nc = 1 if self.single_cls else int(self.data["nc"]) # number of classes - self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 - self.niou = self.iouv.numel() - self.confusion_matrix = ConfusionMatrix(nc=self.nc) - self.dt = [0.0, 0.0, 0.0] - self.names = {k: v for k, v in enumerate(self.data["names"])} - self.s = ( - ("%20s" + "%11s" * 10) - % ( - "Class", - "Images", - "Labels", - "Box:{P", - "R", - "mAP@.5", - "mAP@.5:.95}", - "Mask:{P", - "R", - "mAP@.5", - "mAP@.5:.95}", - ) - if self.mask - else ("%20s" + "%11s" * 6) - % ( - "Class", - "Images", - "Labels", - "P", - "R", - "mAP@.5", - "mAP@.5:.95", - ) - ) - - # coco stuff - self.is_coco = isinstance(self.data.get("val"), str) and self.data[ - "val" - ].endswith( - "coco/val2017.txt" - ) # COCO dataset - self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) - self.jdict = [] - self.iou_thres = 0.65 if self.is_coco else self.iou_thres - - # masks stuff - self.pred_masks = [] # for mask visualization - - # metric stuff - self.seen = 0 - self.stats = [] - self.total_loss = torch.zeros((4 if self.mask else 3)) - self.metric = Metrics() if self.mask else Metric() - - def run_training(self, model, dataloader, compute_loss=None): - """This is for evaluation when training.""" - self.seen = 0 - self.device = next(model.parameters()).device # get model device - # self.iouv.to(self.device) - self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device) - self.half &= self.device.type != "cpu" # half precision only supported on CUDA - model.half() if self.half else model.float() - # Configure - model.eval() - - # inference - # masks will be `None` if training objection. - for batch_i, (img, targets, paths, shapes, masks) in enumerate( - tqdm(dataloader, desc=self.s) - ): - # reset pred_masks - self.pred_masks = [] - img = img.to(self.device, non_blocking=True) - targets = targets.to(self.device) - if masks is not None: - masks = masks.to(self.device) - out, train_out = self.inference(model, img, targets, masks, compute_loss) - - # Statistics per image - for si, pred in enumerate(out): - self.seen += 1 - - # eval in every image level - labels = targets[targets[:, 0] == si, 1:] - gt_masksi = masks[targets[:, 0] == si] if masks is not None else None - - # get predition masks - proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks( - pred, - proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, - ) - - # for visualization - if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski.cpu()) - - # NOTE: eval in training image-size space - self.compute_stat(pred, pred_maski, labels, gt_masksi) - - if self.plots and batch_i < 3: - self.plot_images(batch_i, img, targets, masks, out, paths) - - # compute map and print it. - t = self.after_infer() - - # Return results - model.float() # for training - return ( - ( - *self.metric.mean_results(), - *(self.total_loss.cpu() / len(dataloader)).tolist(), - ), - self.metric.get_maps(self.nc), - t, - ) - - def run( - self, - weights, - batch_size, - imgsz, - save_txt=False, - save_conf=False, - save_json=False, - task="val", - ): - """This is for native evaluation.""" - model, dataloader, imgsz = self.before_infer( - weights, batch_size, imgsz, save_txt, task - ) - self.seen = 0 - # self.iouv.to(self.device) - self.half &= self.device.type != "cpu" # half precision only supported on CUDA - model.half() if self.half else model.float() - # Configure - model.eval() - - # inference - for batch_i, (img, targets, paths, shapes, masks) in enumerate( - tqdm(dataloader, desc=self.s) - ): - # reset pred_masks - self.pred_masks = [] - img = img.to(self.device, non_blocking=True) - targets = targets.to(self.device) - if masks is not None: - masks = masks.to(self.device) - out, train_out = self.inference(model, img, targets, masks) - - # Statistics per image - for si, pred in enumerate(out): - self.seen += 1 - path = Path(paths[si]) - shape = shapes[si][0] - ratio_pad = shapes[si][1] - - # eval in every image level - labels = targets[targets[:, 0] == si, 1:] - gt_masksi = masks[targets[:, 0] == si] if masks is not None else None - - # get predition masks - proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks( - pred, - proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, - ) - - # for visualization - if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski.cpu()) - - # NOTE: eval in training image-size space - self.compute_stat(pred, pred_maski, labels, gt_masksi) - - # no preditions, not save anything - if len(pred) == 0: - continue - - if save_txt or save_json: - # clone() is for plot_images work correctly - predn = pred.clone() - # 因为test时添加了0.5的padding,因此这里与数据加载的padding不一致,所以需要转入ratio_pad - scale_coords( - img[si].shape[1:], predn[:, :4], shape, ratio_pad - ) # native-space pred - # Save/log - if save_txt and self.save_dir.exists(): - # NOTE: convert coords to native space when save txt. - # support save box preditions only - save_one_txt( - predn, - save_conf, - shape, - file=self.save_dir / "labels" / (path.stem + ".txt"), - ) - if save_json and self.save_dir.exists(): - # NOTE: convert coords to native space when save json. - # if pred_maski is not None: - # h, w, n - pred_maski = scale_masks( - img[si].shape[1:], - pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), - shape, - ratio_pad, - ) - save_one_json( - predn, - self.jdict, - path, - self.class_map, - pred_maski, - ) # append to COCO-JSON dictionary - - if self.plots and batch_i < 3: - self.plot_images(batch_i, img, targets, masks, out, paths) - - # compute map and print it. - t = self.after_infer() - - # save json - if self.save_dir.exists() and save_json: - pred_json = str(self.save_dir / f"predictions.json") # predictions json - print(f"\nEvaluating pycocotools mAP... saving {pred_json}...") - with open(pred_json, "w") as f: - json.dump(self.jdict, f) - - # Print speeds - shape = (batch_size, 3, imgsz, imgsz) - print( - f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" - % t - ) - - s = ( - f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" - if save_txt and self.save_dir.exists() - else "" - ) - print( - f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}" - ) - - # Return results - return ( - ( - *self.metric.mean_results(), - *(self.total_loss.cpu() / len(dataloader)).tolist(), - ), - self.metric.get_maps(self.nc), - t, - ) - - def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): - "prepare for evaluation without training." - self.device = select_device(self.device, batch_size=batch_size) - - # Directories - self.save_dir = increment_path( - Path(self.project) / self.name, exist_ok=self.exist_ok - ) # increment run - if not self.nosave: - (self.save_dir / "labels" if save_txt else self.save_dir).mkdir( - parents=True, exist_ok=True - ) # make dir - - # Load model - check_suffix(weights, ".pt") - model = attempt_load(weights, map_location=self.device) # load FP32 model - gs = max(int(model.stride.max()), 32) # grid size (max stride) - imgsz = check_img_size(imgsz, s=gs) # check image size - - # Data - if self.device.type != "cpu": - model( - torch.zeros(1, 3, imgsz, imgsz) - .to(self.device) - .type_as(next(model.parameters())) - ) # run once - pad = 0.0 if task == "speed" else 0.5 - task = ( - task if task in ("train", "val", "test") else "val" - ) # path to train/val/test images - dataloader = create_dataloader( - self.data[task], - imgsz, - batch_size, - gs, - self.single_cls, - pad=pad, - rect=True, - prefix=colorstr(f"{task}: "), - mask_head=self.mask, - mask_downsample_ratio=self.mask_downsample_ratio, - )[0] - return model, dataloader, imgsz - - def inference(self, model, img, targets, masks=None, compute_loss=None): - """Inference""" - t1 = time_sync() - img = img.half() if self.half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - _, _, height, width = img.shape # batch size, channels, height, width - t2 = time_sync() - self.dt[0] += t2 - t1 - - # Run model - out, train_out = model( - img, augment=self.augment - ) # inference and training outputs - self.dt[1] += time_sync() - t2 - - # Compute loss - if compute_loss: - self.total_loss += compute_loss(train_out, targets, masks)[ - 1 - ] # box, obj, cls - - # Run NMS - targets[:, 2:] *= torch.Tensor([width, height, width, height]).to( - self.device - ) # to pixels - t3 = time_sync() - out = self.nms( - prediction=out, - conf_thres=self.conf_thres, - iou_thres=self.iou_thres, - multi_label=True, - agnostic=self.single_cls, - ) - self.dt[2] += time_sync() - t3 - return out, train_out - - def after_infer(self): - """Do something after inference, such as plots and get metrics. - Return: - t(tuple): speeds of per image. - """ - # Plot confusion matrix - if self.plots and self.save_dir.exists(): - self.confusion_matrix.plot( - save_dir=self.save_dir, names=list(self.names.values()) - ) - - # Compute statistics - stats = [np.concatenate(x, 0) for x in zip(*self.stats)] # to numpy - box_or_mask_any = stats[0].any() or stats[1].any() - stats = stats[1:] if not self.mask else stats - if len(stats) and box_or_mask_any: - results = self.ap_per_class( - *stats, - self.plots, - self.save_dir if self.save_dir.exists() else None, - self.names, - ) - self.metric.update(results) - nt = np.bincount( - stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc - ) # number of targets per class - else: - nt = torch.zeros(1) - - # make this empty, cause make `stats` self is for reduce some duplicated codes. - self.stats = [] - # print information - self.print_metric(nt, stats) - t = tuple(x / self.seen * 1e3 for x in self.dt) # speeds per image - return t - - def process_batch(self, detections, labels, iouv): - """ - Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. - Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 - Returns: - correct (Array[N, 10]), for 10 IoU levels - """ - correct = torch.zeros( - detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device - ) - iou = box_iou(labels[:, 1:], detections[:, :4]) - x = torch.where( - (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]) - ) # IoU above threshold and classes match - if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) # [label, detection, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(iouv.device) - correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv - return correct - - def get_predmasks(self, pred, proto_out, gt_shape): - """Get pred masks in different ways. - 1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size) - mask for saving cuda memory. - 2. process_mask_upsample, for val after training to get high quality mask(original size). - - Args: - pred(torch.Tensor): output of network, (N, 5 + mask_dim + class). - proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w). - gt_shape(tuple): shape of gt mask, this shape may not equal to input size of - input image, Cause the mask_downsample_ratio. - Return: - pred_mask(torch.Tensor): predition of final masks with the same size with - input image, (N, input_h, input_w). - """ - if proto_out is None or len(pred) == 0: - return None - process = process_mask_upsample if self.plots else process_mask - gt_shape = ( - gt_shape[0] * self.mask_downsample_ratio, - gt_shape[1] * self.mask_downsample_ratio, - ) - # n, h, w - pred_mask = ( - process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape) - .permute(2, 0, 1) - .contiguous() - ) - return pred_mask - - def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): - assert not ( - (pred_maski is None) ^ (gt_masksi is None) - ), "`proto_out` and `gt_masksi` should be both None or both exist." - if pred_maski is None and gt_masksi is None: - return torch.zeros(0, self.niou, dtype=torch.bool) - - correct = torch.zeros( - predn.shape[0], - self.iouv.shape[0], - dtype=torch.bool, - device=self.iouv.device, - ) - - if not self.plots: - gt_masksi = F.interpolate( - gt_masksi.unsqueeze(0), - pred_maski.shape[1:], - mode="bilinear", - align_corners=False, - ).squeeze(0) - - iou = mask_iou( - gt_masksi.view(gt_masksi.shape[0], -1), - pred_maski.view(pred_maski.shape[0], -1), - ) - x = torch.where( - (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]) - ) # IoU above threshold and classes match - if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) # [label, detection, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(self.iouv.device) - correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv - return correct - - def compute_stat(self, predn, pred_maski, labels, gt_maski): - """Compute states about ious. with boxs size in training img-size space.""" - nl = len(labels) - tcls = labels[:, 0].tolist() if nl else [] # target class - - if len(predn) == 0: - if nl: - self.stats.append( - ( - torch.zeros(0, self.niou, dtype=torch.bool), # boxes - torch.zeros(0, self.niou, dtype=torch.bool), # masks - torch.Tensor(), - torch.Tensor(), - tcls, - ) - ) - return - - # Predictions - if self.single_cls: - predn[:, 5] = 0 - - # Evaluate - if nl: - tbox = xywh2xyxy(labels[:, 1:5]) # target boxes - labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels - # boxes - correct_boxes = self.process_batch(predn, labelsn, self.iouv) - - # masks - correct_masks = self.process_batch_masks( - predn, pred_maski, gt_maski, labelsn - ) - - if self.plots: - self.confusion_matrix.process_batch(predn, labelsn) - else: - correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) - correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) - self.stats.append( - ( - correct_masks.cpu(), - correct_boxes.cpu(), - predn[:, 4].cpu(), - predn[:, 5].cpu(), - tcls, - ) - ) # (correct, conf, pcls, tcls) - - def print_metric(self, nt, stats): - # Print results - pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4) - print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results())) - - # Print results per class - # TODO: self.seen support verbose. - if self.verbose and self.nc > 1 and len(stats): - for i, c in enumerate(self.metric.ap_class_index): - print( - pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)) - ) - - def plot_images(self, i, img, targets, masks, out, paths): - if not self.save_dir.exists(): - return - # plot ground truth - f = self.save_dir / f"val_batch{i}_labels.jpg" # labels - - Thread( - target=plot_images_boxes_and_masks, - args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), - daemon=True, - ).start() - f = self.save_dir / f"val_batch{i}_pred.jpg" # predictions - - # plot predition - if len(self.pred_masks): - pred_masks = ( - torch.cat(self.pred_masks, dim=0) - if len(self.pred_masks) > 1 - else self.pred_masks[0] - ) - else: - pred_masks = None - Thread( - target=plot_images_boxes_and_masks, - args=( - img, - output_to_target(out), - pred_masks, - paths, - f, - self.names, - max(img.shape[2:]), - ), - daemon=True, - ).start() - - def nms(self, **kwargs): - return ( - non_max_suppression_masks(**kwargs) - if self.mask - else non_max_suppression(**kwargs) - ) - - def ap_per_class(self, *args): - return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args) - - -class Metric: - def __init__(self) -> None: - self.p = [] # (nc, ) - self.r = [] # (nc, ) - self.f1 = [] # (nc, ) - self.all_ap = [] # (nc, 10) - self.ap_class_index = [] # (nc, ) - - @property - def ap50(self): - """AP@0.5 of all classes. - Return: - (nc, ) or []. - """ - return self.all_ap[:, 0] if len(self.all_ap) else [] - - @property - def ap(self): - """AP@0.5:0.95 - Return: - (nc, ) or []. - """ - return self.all_ap.mean(1) if len(self.all_ap) else [] - - @property - def mp(self): - """mean precision of all classes. - Return: - float. - """ - return self.p.mean() if len(self.p) else 0.0 - - @property - def mr(self): - """mean recall of all classes. - Return: - float. - """ - return self.r.mean() if len(self.r) else 0.0 - - @property - def map50(self): - """Mean AP@0.5 of all classes. - Return: - float. - """ - return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 - - @property - def map(self): - """Mean AP@0.5:0.95 of all classes. - Return: - float. - """ - return self.all_ap.mean() if len(self.all_ap) else 0.0 - - def mean_results(self): - """Mean of results, return mp, mr, map50, map""" - return (self.mp, self.mr, self.map50, self.map) - - def class_result(self, i): - """class-aware result, return p[i], r[i], ap50[i], ap[i]""" - return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) - - def get_maps(self, nc): - maps = np.zeros(nc) + self.map - for i, c in enumerate(self.ap_class_index): - maps[c] = self.ap[i] - return maps - - def update(self, results): - """ - Args: - results: tuple(p, r, ap, f1, ap_class) - """ - p, r, all_ap, f1, ap_class_index = results - self.p = p - self.r = r - self.all_ap = all_ap - self.f1 = f1 - self.ap_class_index = ap_class_index - - -class Metrics: - """Metric for boxes and masks.""" - - def __init__(self) -> None: - self.metric_box = Metric() - self.metric_mask = Metric() - - def update(self, results): - """ - Args: - results: Dict{'boxes': Dict{}, 'masks': Dict{}} - """ - self.metric_box.update(list(results["boxes"].values())) - self.metric_mask.update(list(results["masks"].values())) - - def mean_results(self): - return self.metric_box.mean_results() + self.metric_mask.mean_results() - - def class_result(self, i): - return self.metric_box.class_result(i) + self.metric_mask.class_result(i) - - def get_maps(self, nc): - return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) - - @property - def ap_class_index(self): - # boxes and masks have the same ap_class_index - return self.metric_box.ap_class_index diff --git a/eval_seg.py b/evaluator.py similarity index 100% rename from eval_seg.py rename to evaluator.py diff --git a/train_instseg.py b/train_instseg.py index b3c699c182e9..b141037d7e18 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -66,7 +66,7 @@ from torch.optim import AdamW import yaml from datetime import datetime -from evaluate import Yolov5Evaluator +from evaluator import Yolov5Evaluator def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary print(device) From bf82d7452023111108cfeac935681867322628f2 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 19 Jul 2022 18:47:56 +0530 Subject: [PATCH 039/247] fix conflicts with laughing-q --- evaluator.py | 451 +++++++++++++++------------------------------------ 1 file changed, 128 insertions(+), 323 deletions(-) diff --git a/evaluator.py b/evaluator.py index 24f2e40b95db..3e5e3ded21f0 100644 --- a/evaluator.py +++ b/evaluator.py @@ -1,3 +1,12 @@ +# TODO: Optimize plotting, losses & merge with val.py + +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Validate a trained YOLOv5 model accuracy on a custom dataset + +Usage: + $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640 +""" import json from pathlib import Path @@ -6,33 +15,18 @@ import numpy as np import torch import torch.nn.functional as F +from PIL import Image # import pycocotools.mask as mask_util from tqdm import tqdm from models.experimental import attempt_load from seg_dataloaders import create_dataloader -from utils.general import ( - coco80_to_coco91_class, - increment_path, - colorstr, check_dataset, check_img_size, check_suffix -) - -from utils.segment import ( - non_max_suppression_masks, - mask_iou, - process_mask, - process_mask_upsample, - scale_masks, -) -from utils.boxes import ( - box_iou, - non_max_suppression, - scale_coords, - xyxy2xywh, - xywh2xyxy, -) +from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, ) +from utils.general import (check_dataset, check_img_size, check_suffix, ) +from utils.general import (coco80_to_coco91_class, increment_path, colorstr, ) +from utils.plots import output_to_target, plot_images_boxes_and_masks from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix -from utils.seg_plots import output_to_target, plot_images_boxes_and_masks +from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, ) from utils.torch_utils import select_device, time_sync @@ -40,9 +34,7 @@ def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): - xywh = ( - (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() - ) # normalized xywh + xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") @@ -56,20 +48,13 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): if pred_masks is not None: pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [ - mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] - for mask in pred_masks - ] + rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): - pred_dict = { - "image_id": image_id, - "category_id": class_map[int(p[5])], - "bbox": [round(x, 3) for x in b], - "score": round(p[4], 5), - } + pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b], + "score": round(p[4], 5), } if pred_masks is not None: pred_dict["segmentation"] = rles[i] jdict.append(pred_dict) @@ -77,25 +62,9 @@ def save_one_json(predn, jdict, path, class_map, pred_masks=None): @torch.no_grad() class Yolov5Evaluator: - def __init__( - self, - data, - conf_thres=0.001, - iou_thres=0.6, - device="", - single_cls=False, - augment=False, - verbose=False, - project="runs/val", - name="exp", - exist_ok=False, - half=True, - save_dir=Path(""), - nosave=False, - plots=True, - mask=False, - mask_downsample_ratio=1, - ) -> None: + def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False, + project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True, + max_plot_dets=10, mask=False, mask_downsample_ratio=1, overlap=False) -> None: self.data = check_dataset(data) # check self.conf_thres = conf_thres # confidence threshold self.iou_thres = iou_thres # NMS IoU threshold @@ -110,8 +79,10 @@ def __init__( self.save_dir = save_dir self.nosave = nosave self.plots = plots + self.max_plot_dets = max_plot_dets self.mask = mask self.mask_downsample_ratio = mask_downsample_ratio + self.overlap = overlap self.nc = 1 if self.single_cls else int(self.data["nc"]) # number of classes self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 @@ -119,40 +90,14 @@ def __init__( self.confusion_matrix = ConfusionMatrix(nc=self.nc) self.dt = [0.0, 0.0, 0.0] self.names = {k: v for k, v in enumerate(self.data["names"])} - self.s = ( - ("%20s" + "%11s" * 10) - % ( - "Class", - "Images", - "Labels", - "Box:{P", - "R", - "mAP@.5", - "mAP@.5:.95}", - "Mask:{P", - "R", - "mAP@.5", - "mAP@.5:.95}", - ) - if self.mask - else ("%20s" + "%11s" * 6) - % ( - "Class", - "Images", - "Labels", - "P", - "R", - "mAP@.5", - "mAP@.5:.95", - ) - ) + self.s = (("%20s" + "%11s" * 10) % ( + "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5", + "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % ( + "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",)) # coco stuff - self.is_coco = isinstance(self.data.get("val"), str) and self.data[ - "val" - ].endswith( - "coco/val2017.txt" - ) # COCO dataset + self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith( + "coco/val2017.txt") # COCO dataset self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) self.jdict = [] self.iou_thres = 0.65 if self.is_coco else self.iou_thres @@ -166,6 +111,7 @@ def __init__( self.total_loss = torch.zeros((4 if self.mask else 3)) self.metric = Metrics() if self.mask else Metric() + @torch.no_grad() def run_training(self, model, dataloader, compute_loss=None): """This is for evaluation when training.""" self.seen = 0 @@ -179,15 +125,13 @@ def run_training(self, model, dataloader, compute_loss=None): # inference # masks will be `None` if training objection. - for batch_i, (img, targets, paths, shapes, masks) in enumerate( - tqdm(dataloader, desc=self.s) - ): + for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): # reset pred_masks self.pred_masks = [] img = img.to(self.device, non_blocking=True) targets = targets.to(self.device) if masks is not None: - masks = masks.to(self.device) + masks = masks.to(self.device).float() out, train_out = self.inference(model, img, targets, masks, compute_loss) # Statistics per image @@ -196,24 +140,22 @@ def run_training(self, model, dataloader, compute_loss=None): # eval in every image level labels = targets[targets[:, 0] == si, 1:] - gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + midx = [si] if self.overlap else targets[:, 0] == si + gt_masksi = masks[midx] if masks is not None else None # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks( - pred, - proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, - ) + pred_maski = self.get_predmasks(pred, proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, ) # for visualization if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski.cpu()) + self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) - if self.plots and batch_i < 3: + if self.plots and batch_i < 2: self.plot_images(batch_i, img, targets, masks, out, paths) # compute map and print it. @@ -221,29 +163,12 @@ def run_training(self, model, dataloader, compute_loss=None): # Return results model.float() # for training - return ( - ( - *self.metric.mean_results(), - *(self.total_loss.cpu() / len(dataloader)).tolist(), - ), - self.metric.get_maps(self.nc), - t, - ) - - def run( - self, - weights, - batch_size, - imgsz, - save_txt=False, - save_conf=False, - save_json=False, - task="val", - ): + return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), + self.metric.get_maps(self.nc), t,) + + def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ): """This is for native evaluation.""" - model, dataloader, imgsz = self.before_infer( - weights, batch_size, imgsz, save_txt, task - ) + model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task) self.seen = 0 # self.iouv.to(self.device) self.half &= self.device.type != "cpu" # half precision only supported on CUDA @@ -252,15 +177,13 @@ def run( model.eval() # inference - for batch_i, (img, targets, paths, shapes, masks) in enumerate( - tqdm(dataloader, desc=self.s) - ): + for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): # reset pred_masks self.pred_masks = [] img = img.to(self.device, non_blocking=True) targets = targets.to(self.device) if masks is not None: - masks = masks.to(self.device) + masks = masks.to(self.device).float() out, train_out = self.inference(model, img, targets, masks) # Statistics per image @@ -272,19 +195,17 @@ def run( # eval in every image level labels = targets[targets[:, 0] == si, 1:] - gt_masksi = masks[targets[:, 0] == si] if masks is not None else None + midx = [si] if self.overlap else targets[:, 0] == si + gt_masksi = masks[midx] if masks is not None else None # get predition masks proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks( - pred, - proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, - ) + pred_maski = self.get_predmasks(pred, proto_out, + gt_masksi.shape[1:] if gt_masksi is not None else None, ) # for visualization if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski.cpu()) + self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) # NOTE: eval in training image-size space self.compute_stat(pred, pred_maski, labels, gt_masksi) @@ -297,36 +218,21 @@ def run( # clone() is for plot_images work correctly predn = pred.clone() # 因为test时添加了0.5的padding,因此这里与数据加载的padding不一致,所以需要转入ratio_pad - scale_coords( - img[si].shape[1:], predn[:, :4], shape, ratio_pad - ) # native-space pred + scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad) # native-space pred + # Save/log if save_txt and self.save_dir.exists(): # NOTE: convert coords to native space when save txt. # support save box preditions only - save_one_txt( - predn, - save_conf, - shape, - file=self.save_dir / "labels" / (path.stem + ".txt"), - ) + save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), ) if save_json and self.save_dir.exists(): # NOTE: convert coords to native space when save json. # if pred_maski is not None: # h, w, n - pred_maski = scale_masks( - img[si].shape[1:], - pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), - shape, - ratio_pad, - ) - save_one_json( - predn, - self.jdict, - path, - self.class_map, - pred_maski, - ) # append to COCO-JSON dictionary + pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, ratio_pad, ) + save_one_json(predn, self.jdict, path, self.class_map, + pred_maski, ) # append to COCO-JSON dictionary if self.plots and batch_i < 3: self.plot_images(batch_i, img, targets, masks, out, paths) @@ -343,42 +249,24 @@ def run( # Print speeds shape = (batch_size, 3, imgsz, imgsz) - print( - f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" - % t - ) + print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t) s = ( - f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" - if save_txt and self.save_dir.exists() - else "" - ) - print( - f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}" - ) + f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "") + print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}") # Return results - return ( - ( - *self.metric.mean_results(), - *(self.total_loss.cpu() / len(dataloader)).tolist(), - ), - self.metric.get_maps(self.nc), - t, - ) + return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), + self.metric.get_maps(self.nc), t,) def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): "prepare for evaluation without training." self.device = select_device(self.device, batch_size=batch_size) # Directories - self.save_dir = increment_path( - Path(self.project) / self.name, exist_ok=self.exist_ok - ) # increment run + self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok) # increment run if not self.nosave: - (self.save_dir / "labels" if save_txt else self.save_dir).mkdir( - parents=True, exist_ok=True - ) # make dir + (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model check_suffix(weights, ".pt") @@ -388,27 +276,11 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): # Data if self.device.type != "cpu": - model( - torch.zeros(1, 3, imgsz, imgsz) - .to(self.device) - .type_as(next(model.parameters())) - ) # run once + model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters()))) # run once pad = 0.0 if task == "speed" else 0.5 - task = ( - task if task in ("train", "val", "test") else "val" - ) # path to train/val/test images - dataloader = create_dataloader( - self.data[task], - imgsz, - batch_size, - gs, - self.single_cls, - pad=pad, - rect=True, - prefix=colorstr(f"{task}: "), - mask_head=self.mask, - mask_downsample_ratio=self.mask_downsample_ratio, - )[0] + task = (task if task in ("train", "val", "test") else "val") # path to train/val/test images + dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True, + prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0] return model, dataloader, imgsz def inference(self, model, img, targets, masks=None, compute_loss=None): @@ -421,29 +293,18 @@ def inference(self, model, img, targets, masks=None, compute_loss=None): self.dt[0] += t2 - t1 # Run model - out, train_out = model( - img, augment=self.augment - ) # inference and training outputs + out, train_out = model(img, augment=self.augment) # inference and training outputs self.dt[1] += time_sync() - t2 # Compute loss if compute_loss: - self.total_loss += compute_loss(train_out, targets, masks)[ - 1 - ] # box, obj, cls + self.total_loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls # Run NMS - targets[:, 2:] *= torch.Tensor([width, height, width, height]).to( - self.device - ) # to pixels + targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device) # to pixels t3 = time_sync() - out = self.nms( - prediction=out, - conf_thres=self.conf_thres, - iou_thres=self.iou_thres, - multi_label=True, - agnostic=self.single_cls, - ) + out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True, + agnostic=self.single_cls, ) self.dt[2] += time_sync() - t3 return out, train_out @@ -454,25 +315,18 @@ def after_infer(self): """ # Plot confusion matrix if self.plots and self.save_dir.exists(): - self.confusion_matrix.plot( - save_dir=self.save_dir, names=list(self.names.values()) - ) + self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values())) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*self.stats)] # to numpy box_or_mask_any = stats[0].any() or stats[1].any() stats = stats[1:] if not self.mask else stats if len(stats) and box_or_mask_any: - results = self.ap_per_class( - *stats, - self.plots, - self.save_dir if self.save_dir.exists() else None, - self.names, - ) + results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None, + self.names, ) self.metric.update(results) - nt = np.bincount( - stats[(3 if not self.mask else 4)].astype(np.int64), minlength=self.nc - ) # number of targets per class + nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64), + minlength=self.nc) # number of targets per class else: nt = torch.zeros(1) @@ -492,19 +346,13 @@ def process_batch(self, detections, labels, iouv): Returns: correct (Array[N, 10]), for 10 IoU levels """ - correct = torch.zeros( - detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device - ) + correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) iou = box_iou(labels[:, 1:], detections[:, :4]) x = torch.where( - (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]) - ) # IoU above threshold and classes match + (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])) # IoU above threshold and classes match if x[0].shape[0]: matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) # [label, detection, iou] + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -532,53 +380,36 @@ def get_predmasks(self, pred, proto_out, gt_shape): if proto_out is None or len(pred) == 0: return None process = process_mask_upsample if self.plots else process_mask - gt_shape = ( - gt_shape[0] * self.mask_downsample_ratio, - gt_shape[1] * self.mask_downsample_ratio, - ) + gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,) # n, h, w - pred_mask = ( - process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape) - .permute(2, 0, 1) - .contiguous() - ) + pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous()) return pred_mask def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): - assert not ( - (pred_maski is None) ^ (gt_masksi is None) - ), "`proto_out` and `gt_masksi` should be both None or both exist." + assert not ((pred_maski is None) ^ ( + gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist." if pred_maski is None and gt_masksi is None: return torch.zeros(0, self.niou, dtype=torch.bool) - correct = torch.zeros( - predn.shape[0], - self.iouv.shape[0], - dtype=torch.bool, - device=self.iouv.device, - ) - - if not self.plots: - gt_masksi = F.interpolate( - gt_masksi.unsqueeze(0), - pred_maski.shape[1:], - mode="bilinear", - align_corners=False, - ).squeeze(0) + correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, ) + + # convert masks (1, 640, 640) -> (n, 640, 640) + if self.overlap: + nl = len(labels) + index = torch.arange(nl, device=gt_masksi.device).view(nl, 1, 1) + 1 + gt_masksi = gt_masksi.repeat(nl, 1, 1) + gt_masksi = torch.where(gt_masksi == index, 1.0, 0.0) + + if gt_masksi.shape[1:] != pred_maski.shape[1:]: + gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear", + align_corners=False, ).squeeze(0) - iou = mask_iou( - gt_masksi.view(gt_masksi.shape[0], -1), - pred_maski.view(pred_maski.shape[0], -1), - ) + iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), ) x = torch.where( - (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5]) - ) # IoU above threshold and classes match + (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match if x[0].shape[0]: matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) - .cpu() - .numpy() - ) # [label, detection, iou] + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -595,15 +426,9 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski): if len(predn) == 0: if nl: - self.stats.append( - ( - torch.zeros(0, self.niou, dtype=torch.bool), # boxes - torch.zeros(0, self.niou, dtype=torch.bool), # masks - torch.Tensor(), - torch.Tensor(), - tcls, - ) - ) + self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), # boxes + torch.zeros(0, self.niou, dtype=torch.bool), # masks + torch.Tensor(), torch.Tensor(), tcls,)) return # Predictions @@ -618,24 +443,15 @@ def compute_stat(self, predn, pred_maski, labels, gt_maski): correct_boxes = self.process_batch(predn, labelsn, self.iouv) # masks - correct_masks = self.process_batch_masks( - predn, pred_maski, gt_maski, labelsn - ) + correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn) if self.plots: self.confusion_matrix.process_batch(predn, labelsn) else: correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) - self.stats.append( - ( - correct_masks.cpu(), - correct_boxes.cpu(), - predn[:, 4].cpu(), - predn[:, 5].cpu(), - tcls, - ) - ) # (correct, conf, pcls, tcls) + self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(), + tcls,)) # (correct, conf, pcls, tcls) def print_metric(self, nt, stats): # Print results @@ -646,52 +462,41 @@ def print_metric(self, nt, stats): # TODO: self.seen support verbose. if self.verbose and self.nc > 1 and len(stats): for i, c in enumerate(self.metric.ap_class_index): - print( - pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i)) - ) + print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))) def plot_images(self, i, img, targets, masks, out, paths): if not self.save_dir.exists(): return # plot ground truth f = self.save_dir / f"val_batch{i}_labels.jpg" # labels + + if masks is not None and masks.shape[1:] != img.shape[2:]: + masks = F.interpolate( + masks.unsqueeze(0).float(), + img.shape[2:], + mode="bilinear", + align_corners=False, + ).squeeze(0) - Thread( - target=plot_images_boxes_and_masks, - args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), - daemon=True, - ).start() + Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), + daemon=True, ).start() f = self.save_dir / f"val_batch{i}_pred.jpg" # predictions # plot predition if len(self.pred_masks): - pred_masks = ( - torch.cat(self.pred_masks, dim=0) - if len(self.pred_masks) > 1 - else self.pred_masks[0] - ) + pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0]) else: pred_masks = None - Thread( - target=plot_images_boxes_and_masks, - args=( - img, - output_to_target(out), - pred_masks, - paths, - f, - self.names, - max(img.shape[2:]), - ), - daemon=True, - ).start() + plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:])) + #Thread(target=plot_images_boxes_and_masks, + # args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),), + # daemon=True, ).start() + import wandb + if wandb.run: + wandb.log({f"pred_{i}": wandb.Image(str(f))}) def nms(self, **kwargs): - return ( - non_max_suppression_masks(**kwargs) - if self.mask - else non_max_suppression(**kwargs) - ) + return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) def ap_per_class(self, *args): return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args) @@ -807,4 +612,4 @@ def get_maps(self, nc): @property def ap_class_index(self): # boxes and masks have the same ap_class_index - return self.metric_box.ap_class_index + return self.metric_box.ap_class_index \ No newline at end of file From 6ad389bb6fa6afe164f6c8e6156c8a37b0556611 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Wed, 20 Jul 2022 18:00:11 +0800 Subject: [PATCH 040/247] add val_instseg.py&&remove useless code --- evaluator.py | 4 +- seg_dataloaders.py | 199 ++----------- train_instseg.py | 2 +- utils/boxes.py | 298 -------------------- utils/seg_plots.py | 689 --------------------------------------------- val_instseg.py | 85 ++++++ 6 files changed, 105 insertions(+), 1172 deletions(-) delete mode 100644 utils/boxes.py delete mode 100644 utils/seg_plots.py create mode 100644 val_instseg.py diff --git a/evaluator.py b/evaluator.py index 3e5e3ded21f0..27533c3048f1 100644 --- a/evaluator.py +++ b/evaluator.py @@ -270,7 +270,7 @@ def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): # Load model check_suffix(weights, ".pt") - model = attempt_load(weights, map_location=self.device) # load FP32 model + model = attempt_load(weights, device=self.device) # load FP32 model gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(imgsz, s=gs) # check image size @@ -612,4 +612,4 @@ def get_maps(self, nc): @property def ap_class_index(self): # boxes and masks have the same ap_class_index - return self.metric_box.ap_class_index \ No newline at end of file + return self.metric_box.ap_class_index diff --git a/seg_dataloaders.py b/seg_dataloaders.py index ac6da36fab09..4d74bb00c1a9 100644 --- a/seg_dataloaders.py +++ b/seg_dataloaders.py @@ -8,11 +8,14 @@ import json import logging import time +import numpy as np from functools import wraps from itertools import repeat from multiprocessing.pool import ThreadPool, Pool from pathlib import Path from zipfile import ZipFile +from PIL import Image +from tqdm import tqdm import torch.nn.functional as F import yaml @@ -20,8 +23,6 @@ from torch.utils.data import distributed from torch.utils.data.sampler import BatchSampler as torchBatchSampler from torch.utils.data.sampler import RandomSampler -from torch.utils.data.sampler import Sampler -from tqdm import tqdm from seg_augmentations import (Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, ) from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy @@ -60,8 +61,8 @@ def __iter__(self): def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, - rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, neg_dir="", - bg_dir="", area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False): + rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, + area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False): if rect and shuffle: print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False") shuffle = False @@ -72,7 +73,7 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non hyp=hyp, # augmentation hyperparameters rect=rect, # rectangular training cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights, - prefix=prefix, neg_dir=neg_dir, bg_dir=bg_dir, area_thr=area_thr, ) + prefix=prefix, area_thr=area_thr, ) if mask_head: dataset.downsample_ratio = mask_downsample_ratio dataset.overlap = overlap_mask @@ -88,10 +89,6 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non # batch-size and batch-sampler is exclusion batch_sampler=batch_sampler, pin_memory=True, collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn, - # Make sure each process has different random seed, especially for 'fork' method. - # Check https://github.com/pytorch/pytorch/issues/63311 for more details. - # but this will make init_seed() not work. - # worker_init_fn=worker_init_reset_seed, ) return dataloader, dataset @@ -141,7 +138,7 @@ class LoadImagesAndLabels(Dataset): cache_version = 0.6 # dataset labels *.cache version def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", neg_dir="", bg_dir="", area_thr=0.2, ): + cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", area_thr=0.2, ): super().__init__(augment=augment) self.img_size = img_size self.hyp = hyp @@ -154,7 +151,6 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.albumentations = Albumentations() if augment else None # additional feature - self.img_neg_files, self.img_bg_files = self.get_neg_and_bg(neg_dir, bg_dir) self.area_thr = area_thr p = Path(path) # os-agnostic @@ -235,20 +231,6 @@ def get_img_files(self, p, prefix): raise Exception(f"{prefix}Error loading data from {str(p)}: {e}\nSee {HELP_URL}") return img_files - def get_neg_and_bg(self, neg_dir, bg_dir): - """Get negative pictures and background pictures.""" - img_neg_files, img_bg_files = [], [] - if os.path.isdir(neg_dir): - img_neg_files = [os.path.join(neg_dir, i) for i in os.listdir(neg_dir)] - logging.info(colorstr( - "Negative dir: ") + f"'{neg_dir}', using {len(img_neg_files)} pictures from the dir as negative samples during training") - - if os.path.isdir(bg_dir): - img_bg_files = [os.path.join(bg_dir, i) for i in os.listdir(bg_dir)] - logging.info(colorstr( - "Background dir: ") + f"{bg_dir}, using {len(img_bg_files)} pictures from the dir as background during training") - return img_neg_files, img_bg_files - def load_cache(self, cache_path, prefix): """Load labels from *.cache file.""" try: @@ -454,11 +436,11 @@ def collate_fn4(batch): class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0, prefix="", neg_dir="", bg_dir="", area_thr=0.2, + cache_images=False, single_cls=False, stride=32, pad=0, prefix="", area_thr=0.2, downsample_ratio=1, overlap=False, ): super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, - stride, pad, prefix, neg_dir, bg_dir, area_thr, ) + stride, pad, prefix, area_thr, ) self.downsample_ratio = downsample_ratio self.overlap = overlap @@ -590,66 +572,23 @@ def load_image(self, i): return (self.imgs[i], self.img_hw0[i], self.img_hw[i],) # im, hw_original, hw_resized -def load_neg_image(self, index): - path = self.img_neg_files[index] - img = cv2.imread(path) # BGR - assert img is not None, "Image Not Found " + path - h0, w0 = img.shape[:2] # orig hw - r = self.img_size / max(h0, w0) # resize image to img_size - if r != 1: # always resize down, only resize up if training with augmentation - interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR - img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) - return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized - - -def load_bg_image(self, index): - path = self.img_files[index] - bg_path = self.img_bg_files[np.random.randint(0, len(self.img_bg_files))] - img, coord, _, (w, h) = paste1(path, bg_path, bg_size=self.img_size, fg_scale=random.uniform(1.5, 5)) - label = self.labels[index] - label[:, 1] = (label[:, 1] * w + coord[0]) / img.shape[1] - label[:, 2] = (label[:, 2] * h + coord[1]) / img.shape[0] - label[:, 3] = label[:, 3] * w / img.shape[1] - label[:, 4] = label[:, 4] * h / img.shape[0] - - assert img is not None, "Image Not Found " + path - h0, w0 = img.shape[:2] # orig hw - r = self.img_size / max(h0, w0) # resize image to img_size - if r != 1: # always resize down, only resize up if training with augmentation - interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR - img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) - return img, (h0, w0), img.shape[:2], label # img, hw_original, hw_resized - - def load_mosaic(self, index, return_seg=False): # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic labels4, segments4 = [], [] s = self.img_size yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y - num_neg = random.randint(0, 2) if len(self.img_neg_files) else 0 # 3 additional image indices - indices = [index] + random.choices(self.indices, k=(3 - num_neg)) - indices = indices + random.choices(range(len(self.img_neg_files)), k=num_neg) - ri = list(range(4)) - random.shuffle(ri) - for j, (i, index) in enumerate(zip(ri, indices)): - temp_label = None + indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + for i, index in enumerate(indices): # Load image - # TODO - if j < (4 - num_neg): - if len(self.img_bg_files) and (random.uniform(0, 1) > 0.5): - img, _, (h, w), temp_label = load_bg_image(self, index) - else: - img, _, (h, w) = load_image(self, index) - else: - img, _, (h, w) = load_neg_image(self, index) + img, _, (h, w) = load_image(self, index) + # place img in img4 - if j == 0: - img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles if i == 0: # top left - x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,) # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,) # xmin, ymin, xmax, ymax (small image) + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h @@ -664,15 +603,7 @@ def load_mosaic(self, index, return_seg=False): padw = x1a - x1b padh = y1a - y1b - # Labels - if j >= (4 - num_neg): - continue - - # TODO: deal with segments - if len(self.img_bg_files) and temp_label is not None: - labels, segments = temp_label, [] - else: - labels, segments = self.labels[index].copy(), self.segments[index].copy() + labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format @@ -873,7 +804,6 @@ def hub_ops(f, max_dim=1920): import glob import shutil import hashlib -import uuid import torch import cv2 import random @@ -936,14 +866,6 @@ def exif_transpose(image): image.info["exif"] = exif.tobytes() return image - -def worker_init_reset_seed(worker_id): - seed = uuid.uuid4().int % 2 ** 32 - random.seed(seed) - torch.set_rng_state(torch.manual_seed(seed).get_state()) - np.random.seed(seed) - - def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): """ Args: @@ -1170,90 +1092,3 @@ def __len__(self): def __iter__(self): for i in range(len(self)): yield next(self.iterator) - - -# REFACTOR IN A NEW FILE -from PIL import Image -import numpy as np -from PIL import ImageFile - -# import numbers - -ImageFile.LOAD_TRUNCATED_IMAGES = True - - -def get_raito(new_size, original_size): - """Get the ratio bewtten input_size and original_size""" - # # mmdet way - # iw, ih = new_size - # ow, oh = original_size - # max_long_edge = max(iw, ih) - # max_short_edge = min(iw, ih) - # ratio = min(max_long_edge / max(ow, oh), max_short_edge / min(ow, oh)) - # return ratio - - # # yolov5 way - return min(new_size[0] / original_size[0], new_size[1] / original_size[1]) - - -def imresize(img, new_size): - """Resize the img with new_size by PIL(keep aspect). - - Args: - img (PIL): The original image. - new_size (tuple): The new size(w, h). - """ - if isinstance(new_size, int): - new_size = (new_size, new_size) - old_size = img.size - ratio = get_raito(new_size, old_size) - img = img.resize((int(old_size[0] * ratio), int(old_size[1] * ratio))) - return img - - -def get_wh(a, b): - return np.random.randint(a, b) - - -def paste2(sample1, sample2, background, scale=1.2): - sample1 = Image.open(sample1) - d_w1, d_h1 = sample1.size - - sample2 = Image.open(sample2) - d_w2, d_h2 = sample2.size - - # print(sample.size) - background = Image.open(background) - background = background.resize((int((d_w1 + d_w2) * scale), int((d_h1 + d_h2) * scale))) - bw, bh = background.size - - x1, y1 = get_wh(0, int(d_w1 * scale) - d_w1), get_wh(0, bh - d_h1) - x2, y2 = get_wh(int(d_w1 * scale), bw - d_w2), get_wh(0, bh - d_h2) - # x1, y1 = get_wh(0, int(bw / 2) - d_w1), get_wh(0, bh - d_h1) - # x2, y2 = get_wh(int(bw / 2), bw - d_w2), get_wh(0, bh - d_h2) - - background.paste(sample1, (x1, y1)) - background.paste(sample2, (x2, y2)) - # background = background.resize((416, 416)) - - return np.array(background), (x1, y1, x2, y2), background # print(background.size) # background.show() - - -def paste1(sample, background, bg_size, fg_scale=1.5): - sample = Image.open(sample) - background = Image.open(background) - background = imresize(background, bg_size) - bw, bh = background.size - # background = background.resize((int(d_w * scale), int(d_h * scale))) - new_w, new_h = int(bw / fg_scale), int(bh / fg_scale) - sample = imresize(sample, (new_w, new_h)) - - d_w, d_h = sample.size - x1, y1 = get_wh(0, bw - d_w), get_wh(0, bh - d_h) - background.paste(sample, (x1, y1)) - # draw = ImageDraw.Draw(background) - # draw.rectangle((x1 + 240, y1 + 254, x1 + 240 + 5, y1 + 254 + 5), 'red', 'green') - # draw.rectangle((x1 + 80, y1 + 28, x1 + 400, y1 + 480), None, 'green') - # background = background.resize((416, 416)) - - return np.array(background.convert('RGB'))[:, :, ::-1], (x1, y1), background, (d_w, d_h) diff --git a/train_instseg.py b/train_instseg.py index eaffd189e574..b1ea72ff5757 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -54,7 +54,7 @@ from utils.loggers.wandb.wandb_utils import check_wandb_resume from utils.seg_loss import ComputeLoss #from utils.metrics import fitness -from utils.seg_plots import plot_evolve, plot_labels +from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first diff --git a/utils/boxes.py b/utils/boxes.py deleted file mode 100644 index 1881dde83c81..000000000000 --- a/utils/boxes.py +++ /dev/null @@ -1,298 +0,0 @@ -import time - -import cv2 -import numpy as np -import torch -import torchvision - -from utils.general import clip_coords, scale_coords, xywh2xyxy, xyxy2xywh -from .general import increment_path -from .metrics import box_iou - - -def nms_numpy(boxes, scores, class_id, threshold, method=None, agnostic=False): - """ - :param boxes: numpy(N, 4), xyxy - :param scores: numpy(N, ) - :param class_id: numpy(N, ) - :param threshold: float - :param method: - :return: kept boxed index - """ - if boxes.size == 0: - return np.empty((0,), dtype=np.int8) - max_wh = 4096 - if isinstance(boxes, torch.Tensor): - boxes = boxes.cpu().numpy() - if isinstance(scores, torch.Tensor): - scores = scores.cpu().numpy() - if isinstance(class_id, torch.Tensor): - class_id = class_id.cpu().numpy() - - if boxes.ndim == 1: - boxes = boxes[None, :] - assert boxes.shape[1] == 4, f"expected boxes shape [N, 4], but got {boxes.shape}" - if len(class_id.shape) == 1: - class_id = class_id[:, None] - - assert (boxes.shape[0] == class_id.shape[0] == scores.shape[0]), f"boxes, class_id and scores shapes must be equal" - - c = class_id * (0 if agnostic else max_wh) - boxes = boxes + c - x1 = boxes[:, 0].copy() - y1 = boxes[:, 1].copy() - x2 = boxes[:, 2].copy() - y2 = boxes[:, 3].copy() - - s = scores - area = (x2 - x1 + 1) * (y2 - y1 + 1) - - I = np.argsort(s) # 从小到大排序索引 - pick = np.zeros_like(s, dtype=np.int16) - counter = 0 - while I.size > 0: - i = I[-1] - pick[counter] = i - counter += 1 - idx = I[0:-1] - - xx1 = np.maximum(x1[i], x1[idx]).copy() - yy1 = np.maximum(y1[i], y1[idx]).copy() - xx2 = np.minimum(x2[i], x2[idx]).copy() - yy2 = np.minimum(y2[i], y2[idx]).copy() - - w = np.maximum(0.0, xx2 - xx1 + 1).copy() - h = np.maximum(0.0, yy2 - yy1 + 1).copy() - - inter = w * h - if method == "Min": - o = inter / np.minimum(area[i], area[idx]) - else: - o = inter / (area[i] + area[idx] - inter) - I = I[np.where(o <= threshold)] - - pick = pick[:counter].copy() - return pick - - -def save_one_box(xyxy, im, file="image.jpg", gain=1.02, pad=10, square=False, BGR=False, save=True): - # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop - xyxy = torch.tensor(xyxy).view(-1, 4) - b = xyxy2xywh(xyxy) # boxes - if square: - b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square - b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad - xyxy = xywh2xyxy(b).long() - clip_coords(xyxy, im.shape) - crop = im[int(xyxy[0, 1]): int(xyxy[0, 3]), int(xyxy[0, 0]): int(xyxy[0, 2]), :: (1 if BGR else -1), ] - if save: - cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix(".jpg")), crop) - return crop - - -def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, - labels=(), max_det=300, ): - """Runs Non-Maximum Suppression (NMS) on inference results - - Returns: - list of detections, on (n,6) tensor per image [xyxy, conf, cls] - """ - - nc = prediction.shape[2] - 5 # number of classes - xc = prediction[..., 4] > conf_thres # candidates - - # Checks - assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" - assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" - - # Settings - min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height - max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() - time_limit = 10.0 # seconds to quit after - redundant = True # require redundant detections - multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) - merge = False # use merge-NMS - - t = time.time() - output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] - for xi, x in enumerate(prediction): # image index, image inference - # Apply constraints - # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height - x = x[xc[xi]] # confidence - - # Cat apriori labels if autolabelling - if labels and len(labels[xi]): - l = labels[xi] - v = torch.zeros((len(l), nc + 5), device=x.device) - v[:, :4] = l[:, 1:5] # box - v[:, 4] = 1.0 # conf - v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls - x = torch.cat((x, v), 0) - - # If none remain process next image - if not x.shape[0]: - continue - - # Compute conf - x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf - - # Box (center x, center y, width, height) to (x1, y1, x2, y2) - box = xywh2xyxy(x[:, :4]) - - # Detections matrix nx6 (xyxy, conf, cls) - if multi_label: - i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T - x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) - else: # best class only - conf, j = x[:, 5:].max(1, keepdim=True) - x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] - - # Filter by class - if classes is not None: - x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] - - # Apply finite constraint - # if not torch.isfinite(x).all(): - # x = x[torch.isfinite(x).all(1)] - - # Check shape - n = x.shape[0] # number of boxes - if not n: # no boxes - continue - elif n > max_nms: # excess boxes - x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence - - # Batched NMS - c = x[:, 5:6] * (0 if agnostic else max_wh) # classes - boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores - i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS - if i.shape[0] > max_det: # limit detections - i = i[:max_det] - if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) - # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) - iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix - weights = iou * scores[None] # box weights - x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes - if redundant: - i = i[iou.sum(1) > 1] # require redundancy - - output[xi] = x[i] - if (time.time() - t) > time_limit: - print(f"WARNING: NMS time limit {time_limit}s exceeded") - break # time limit exceeded - - return output - - -def non_max_suppression_numpy(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, - multi_label=False, labels=(), max_det=300, ): - """Runs Non-Maximum Suppression (NMS) on inference results - - Returns: - list of detections, on (n,6) tensor per image [xyxy, conf, cls] - """ - - nc = prediction.shape[2] - 5 # number of classes - xc = prediction[..., 4] > conf_thres # candidates - - # Checks - assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" - assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" - - # Settings - max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() - time_limit = 10.0 # seconds to quit after - multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) - - t = time.time() - output = [np.zeros((0, 6))] * prediction.shape[0] - for xi, x in enumerate(prediction): # image index, image inference - # Apply constraints - # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height - x = x[xc[xi]] # confidence - - # Cat apriori labels if autolabelling - if labels and len(labels[xi]): - l = labels[xi] - v = np.zeros((len(l), nc + 5), device=x.device) - v[:, :4] = l[:, 1:5] # box - v[:, 4] = 1.0 # conf - v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls - x = np.concatenate((x, v), 0) - - # If none remain process next image - if not x.shape[0]: - continue - - # Compute conf - x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf - - # Box (center x, center y, width, height) to (x1, y1, x2, y2) - box = xywh2xyxy(x[:, :4]) - - # Detections matrix nx6 (xyxy, conf, cls) - if multi_label: - i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T - x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].float()), 1) - else: # best class only - conf, j = x[:, 5:].max(1), x[:, 5:].argmax(1) - x = np.concatenate((box, conf[:, None], j.astype(np.float)[:, None]), 1)[conf > conf_thres] - - # Filter by class - if classes is not None: - x = x[(x[:, 5:6] == np.array(classes)).any(1)] - - # Check shape - n = x.shape[0] # number of boxes - if not n: # no boxes - continue - elif n > max_nms: # excess boxes - x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence - - # Batched NMS - boxes, scores, cls = x[:, :4], x[:, 4], x[:, 5] - i = nms_numpy(boxes, scores, cls, iou_thres, agnostic) # NMS - if i.shape[0] > max_det: # limit detections - i = i[:max_det] - - output[xi] = x[i][None, :] if x[i].ndim == 1 else x[i] - if (time.time() - t) > time_limit: - print(f"WARNING: NMS time limit {time_limit}s exceeded") - break # time limit exceeded - - return output - - -def apply_classifier(x, model, img, im0): - # Apply a second stage classifier to yolo outputs - im0 = [im0] if isinstance(im0, np.ndarray) else im0 - for i, d in enumerate(x): # per image - if d is not None and len(d): - d = d.clone() - - # Reshape and pad cutouts - b = xyxy2xywh(d[:, :4]) # boxes - b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square - b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad - d[:, :4] = xywh2xyxy(b).long() - - # Rescale boxes from img_size to im0 size - scale_coords(img.shape[2:], d[:, :4], im0[i].shape) - - # Classes - pred_cls1 = d[:, 5].long() - ims = [] - for j, a in enumerate(d): # per item - cutout = im0[i][int(a[1]): int(a[3]), int(a[0]): int(a[2])] - im = cv2.resize(cutout, (224, 224)) # BGR - # cv2.imwrite('example%i.jpg' % j, cutout) - - im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 - im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 - im /= 255.0 # 0 - 255 to 0.0 - 1.0 - ims.append(im) - - pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction - x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections - - return x diff --git a/utils/seg_plots.py b/utils/seg_plots.py deleted file mode 100644 index 3f09d2ad272c..000000000000 --- a/utils/seg_plots.py +++ /dev/null @@ -1,689 +0,0 @@ -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Plotting utils -""" - -import math -import os -from copy import copy -from itertools import repeat -from pathlib import Path - -import cv2 -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sn -import torch -from PIL import Image, ImageDraw - -from utils.general import check_font, is_ascii, is_chinese -from utils.seg_metrics import fitness -from .boxes import xywh2xyxy, xyxy2xywh - -# Settings -RANK = int(os.getenv("RANK", -1)) -matplotlib.rc("font", **{"size": 11}) -matplotlib.use("Agg") # for writing to files only - - -class Colors: - # Ultralytics color palette https://ultralytics.com/ - def __init__(self): - # hex = matplotlib.colors.TABLEAU_COLORS.values() - hex = ("FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231", "48F90A", "92CC17", "3DDB86", "1A9334", "00D4BB", - "2C99A8", "00C2FF", "344593", "6473FF", "0018EC", "8438FF", "520085", "CB38FF", "FF95C8", "FF37C7",) - self.palette = [self.hex2rgb("#" + c) for c in hex] - self.n = len(self.palette) - - def __call__(self, i, bgr=False): - c = self.palette[int(i) % self.n] - return (c[2], c[1], c[0]) if bgr else c - - @staticmethod - def hex2rgb(h): # rgb order (PIL) - return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4)) - - -colors = Colors() # create instance for 'from utils.plots import colors' - - -class Annotator: - if RANK in (-1, 0): - check_font() # download TTF if necessary - - # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations - def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=False, example="abc", ): - assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images." - self.pil = pil or not is_ascii(example) or is_chinese(example) - if self.pil: # use PIL - self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) - self.draw = ImageDraw.Draw(self.im) - self.font = check_font(font="Arial.Unicode.ttf" if is_chinese(example) else font, - size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12), ) - else: # use cv2 - self.im = im - self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width - - def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)): - # Add one xyxy box to image with label - if self.pil or not is_ascii(label): - self.draw.rectangle(box, width=self.lw, outline=color) # box - if label: - w, h = self.font.getsize(label) # text width, height - outside = box[1] - h >= 0 # label fits outside box - self.draw.rectangle([box[0], box[1] - h if outside else box[1], box[0] + w + 1, - box[1] + 1 if outside else box[1] + h + 1, ], fill=color, ) - # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 - self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font, ) - else: # cv2 - p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) - cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) - if label: - tf = max(self.lw - 1, 1) # font thickness - w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height - outside = p1[1] - h - 3 >= 0 # label fits outside box - p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 - cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled - cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color, - thickness=tf, lineType=cv2.LINE_AA, ) - - def rectangle(self, xy, fill=None, outline=None, width=1): - # Add rectangle to image (PIL-only) - self.draw.rectangle(xy, fill, outline, width) - - def text(self, xy, text, txt_color=(255, 255, 255)): - # Add text to image (PIL-only) - w, h = self.font.getsize(text) # text width, height - self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font) - - def result(self): - # Return annotated image as array - return np.asarray(self.im) - - -class Visualizer(object): - """Visualization of one model.""" - - def __init__(self, names) -> None: - super().__init__() - self.names = names - - def draw_one_img(self, img, output, vis_conf=0.4): - """Visualize one images. - - Args: - imgs (numpy.ndarray): one image. - outputs (torch.Tensor): one output, (num_boxes, classes+5) - vis_confs (float, optional): Visualize threshold. - Return: - img (numpy.ndarray): Image after visualization. - """ - if isinstance(output, list): - output = output[0] - if output is None or len(output) == 0: - return img - for (*xyxy, conf, cls) in reversed(output[:, :6]): - if conf < vis_conf: - continue - label = '%s %.2f' % (self.names[int(cls)], conf) - color = colors(int(cls)) - plot_one_box(xyxy, img, label=label, color=color, line_thickness=2) - return img - - def draw_multi_img(self, imgs, outputs, vis_confs=0.4): - """Visualize multi images. - - Args: - imgs (List[numpy.array]): multi images. - outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5]. - vis_confs (float | tuple[float], optional): Visualize threshold. - Return: - imgs (List[numpy.ndarray]): Images after visualization. - """ - if isinstance(vis_confs, float): - vis_confs = list(repeat(vis_confs, len(imgs))) - assert len(imgs) == len(outputs) == len(vis_confs) - for i, output in enumerate(outputs): # detections per image - self.draw_one_img(imgs[i], output, vis_confs[i]) - return imgs - - def draw_imgs(self, imgs, outputs, vis_confs=0.4): - if isinstance(imgs, np.ndarray): - return self.draw_one_img(imgs, outputs, vis_confs) - else: - return self.draw_multi_img(imgs, outputs, vis_confs) - - def __call__(self, imgs, outputs, vis_confs=0.4): - return self.draw_imgs(imgs, outputs, vis_confs) - - -def hist2d(x, y, n=100): - # 2d histogram used in labels.png and evolve.png - xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) - hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) - xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) - yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) - return np.log(hist[xidx, yidx]) - - -def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): - from scipy.signal import butter, filtfilt - - # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy - def butter_lowpass(cutoff, fs, order): - nyq = 0.5 * fs - normal_cutoff = cutoff / nyq - return butter(order, normal_cutoff, btype="low", analog=False) - - b, a = butter_lowpass(cutoff, fs, order=order) - return filtfilt(b, a, data) # forward-backward filter - - -def output_to_target(output): - # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] - targets = [] - for i, o in enumerate(output): - for *box, conf, cls in o.cpu().numpy()[:, :6]: - targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) - return np.array(targets) - - -def plot_images(images, targets, paths=None, fname="images.jpg", names=None, max_size=1920, max_subplots=16, ): - # Plot image grid with labels - if isinstance(images, torch.Tensor): - images = images.cpu().float().numpy() - if isinstance(targets, torch.Tensor): - targets = targets.cpu().numpy() - if np.max(images[0]) <= 1: - images *= 255.0 # de-normalise (optional) - bs, _, h, w = images.shape # batch size, _, height, width - bs = min(bs, max_subplots) # limit plot images - ns = np.ceil(bs ** 0.5) # number of subplots (square) - - # Build Image - mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, im in enumerate(images): - if i == max_subplots: # if last batch has fewer images than we expect - break - x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin - im = im.transpose(1, 2, 0) - mosaic[y: y + h, x: x + w, :] = im - - # Resize (optional) - scale = max_size / ns / max(h, w) - if scale < 1: - h = math.ceil(scale * h) - w = math.ceil(scale * w) - mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) - - # Annotate - fs = int((h + w) * ns * 0.01) # font size - annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True) - for i in range(i + 1): - x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin - annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders - if paths: - annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220), ) # filenames - if len(targets) > 0: - ti = targets[targets[:, 0] == i] # image targets - boxes = xywh2xyxy(ti[:, 2:6]).T - classes = ti[:, 1].astype("int") - labels = ti.shape[1] == 6 # labels if no conf column - conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) - - if boxes.shape[1]: - if boxes.max() <= 1.01: # if normalized with tolerance 0.01 - boxes[[0, 2]] *= w # scale to pixels - boxes[[1, 3]] *= h - elif scale < 1: # absolute coords need scale if image scales - boxes *= scale - boxes[[0, 2]] += x - boxes[[1, 3]] += y - for j, box in enumerate(boxes.T.tolist()): - cls = classes[j] - color = colors(cls) - cls = names[cls] if names else cls - if labels or conf[j] > 0.25: # 0.25 conf thresh - label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}" - annotator.box_label(box, label, color=color) - annotator.im.save(fname) # save - return annotator.result() - - -def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""): - # Plot LR simulating training for full epochs - optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals - y = [] - for _ in range(epochs): - scheduler.step() - y.append(optimizer.param_groups[0]["lr"]) - plt.plot(y, ".-", label="LR") - plt.xlabel("epoch") - plt.ylabel("LR") - plt.grid() - plt.xlim(0, epochs) - plt.ylim(0) - plt.savefig(Path(save_dir) / "LR.png", dpi=200) - plt.close() - - -def plot_val_txt(): # from utils.plots import *; plot_val() - # Plot val.txt histograms - x = np.loadtxt("val.txt", dtype=np.float32) - box = xyxy2xywh(x[:, :4]) - cx, cy = box[:, 0], box[:, 1] - - fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) - ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) - ax.set_aspect("equal") - plt.savefig("hist2d.png", dpi=300) - - fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) - ax[0].hist(cx, bins=600) - ax[1].hist(cy, bins=600) - plt.savefig("hist1d.png", dpi=200) - - -def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() - # Plot targets.txt histograms - x = np.loadtxt("targets.txt", dtype=np.float32).T - s = ["x targets", "y targets", "width targets", "height targets"] - fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) - ax = ax.ravel() - for i in range(4): - ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std())) - ax[i].legend() - ax[i].set_title(s[i]) - plt.savefig("targets.jpg", dpi=200) - - -def plot_val_study(file="", dir="", x=None): # from utils.plots import *; plot_val_study() - # Plot file=study.txt generated by val.py (or plot all study*.txt in dir) - save_dir = Path(file).parent if file else Path(dir) - plot2 = False # plot additional results - if plot2: - ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel() - - fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) - # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]: - for f in sorted(save_dir.glob("study*.txt")): - y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T - x = np.arange(y.shape[1]) if x is None else np.array(x) - if plot2: - s = ["P", "R", "mAP@.5", "mAP@.5:.95", "t_preprocess (ms/img)", "t_inference (ms/img)", "t_NMS (ms/img)", ] - for i in range(7): - ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8) - ax[i].set_title(s[i]) - - j = y[3].argmax() + 1 - ax2.plot(y[5, 1:j], y[3, 1:j] * 1e2, ".-", linewidth=2, markersize=8, - label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"), ) - - ax2.plot(1e3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], "k.-", linewidth=2, - markersize=8, alpha=0.25, label="EfficientDet", ) - - ax2.grid(alpha=0.2) - ax2.set_yticks(np.arange(20, 60, 5)) - ax2.set_xlim(0, 57) - ax2.set_ylim(25, 55) - ax2.set_xlabel("GPU Speed (ms/img)") - ax2.set_ylabel("COCO AP val") - ax2.legend(loc="lower right") - f = save_dir / "study.png" - print(f"Saving {f}...") - plt.savefig(f, dpi=300) - - -def plot_labels(labels, names=(), save_dir=Path("")): - # plot dataset labels - print("Plotting labels... ") - c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes - nc = int(c.max() + 1) # number of classes - x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"]) - - # seaborn correlogram - sn.pairplot(x, corner=True, diag_kind="auto", kind="hist", diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9), ) - plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200) - plt.close() - - # matplotlib labels - matplotlib.use("svg") # faster - ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() - y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) - # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # update colors bug #3195 - ax[0].set_ylabel("instances") - if 0 < len(names) < 30: - ax[0].set_xticks(range(len(names))) - ax[0].set_xticklabels(names, rotation=90, fontsize=10) - else: - ax[0].set_xlabel("classes") - sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9) - sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9) - - # rectangles - labels[:, 1:3] = 0.5 # center - labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 - img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) - for cls, *box in labels[:1000]: - ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot - ax[1].imshow(img) - ax[1].axis("off") - - for a in [0, 1, 2, 3]: - for s in ["top", "right", "left", "bottom"]: - ax[a].spines[s].set_visible(False) - - plt.savefig(save_dir / "labels.jpg", dpi=200) - matplotlib.use("Agg") - plt.close() - - -def profile_idetection(start=0, stop=0, labels=(), save_dir=""): - # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() - ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() - s = ["Images", "Free Storage (GB)", "RAM Usage (GB)", "Battery", "dt_raw (ms)", "dt_smooth (ms)", - "real-world FPS", ] - files = list(Path(save_dir).glob("frames*.txt")) - for fi, f in enumerate(files): - try: - results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows - n = results.shape[1] # number of rows - x = np.arange(start, min(stop, n) if stop else n) - results = results[:, x] - t = results[0] - results[0].min() # set t0=0s - results[0] = x - for i, a in enumerate(ax): - if i < len(results): - label = labels[fi] if len(labels) else f.stem.replace("frames_", "") - a.plot(t, results[i], marker=".", label=label, linewidth=1, markersize=5, ) - a.set_title(s[i]) - a.set_xlabel("time (s)") - # if fi == len(files) - 1: - # a.set_ylim(bottom=0) - for side in ["top", "right"]: - a.spines[side].set_visible(False) - else: - a.remove() - except Exception as e: - print("Warning: Plotting error for %s; %s" % (f, e)) - ax[1].legend() - plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200) - - -def plot_evolve(evolve_csv="path/to/evolve.csv", ): # from utils.plots import *; plot_evolve() - # Plot evolve.csv hyp evolution results - evolve_csv = Path(evolve_csv) - data = pd.read_csv(evolve_csv) - keys = [x.strip() for x in data.columns] - x = data.values - f = fitness(x) - j = np.argmax(f) # max fitness index - plt.figure(figsize=(10, 12), tight_layout=True) - matplotlib.rc("font", **{"size": 8}) - for i, k in enumerate(keys[7:]): - v = x[:, 7 + i] - mu = v[j] # best single result - plt.subplot(6, 5, i + 1) - plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none") - plt.plot(mu, f.max(), "k+", markersize=15) - plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9}) # limit to 40 characters - if i % 5 != 0: - plt.yticks([]) - print("%15s: %.3g" % (k, mu)) - f = evolve_csv.with_suffix(".png") # filename - plt.savefig(f, dpi=200) - plt.close() - print(f"Saved {f}") - - -def plot_results(file="path/to/results.csv", dir="", best=True): - # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') - save_dir = Path(file).parent if file else Path(dir) - fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) - ax = ax.ravel() - files = list(save_dir.glob("results*.csv")) - assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." - for _, f in enumerate(files): - try: - data = pd.read_csv(f) - index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6]) - s = [x.strip() for x in data.columns] - x = data.values[:, 0] - for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]): - y = data.values[:, j] - # y[y == 0] = np.nan # don't show zero values - ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) - if best: - # best - ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, ) - ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") - else: - # last - ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) - ax[i].set_title(s[ - j] + f"\n{round(y[-1], 5)}") # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) - except Exception as e: - print(f"Warning: Plotting error for {f}: {e}") - ax[1].legend() - fig.savefig(save_dir / "results.png", dpi=200) - plt.close() - - -def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): - # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') - save_dir = Path(file).parent if file else Path(dir) - fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) - ax = ax.ravel() - files = list(save_dir.glob("results*.csv")) - assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." - for _, f in enumerate(files): - try: - data = pd.read_csv(f) - index = np.argmax( - 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 0.1 * data.values[:, - 11], ) - s = [x.strip() for x in data.columns] - x = data.values[:, 0] - for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): - y = data.values[:, j] - # y[y == 0] = np.nan # don't show zero values - ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) - if best: - # best - ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3, ) - ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") - else: - # last - ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) - ax[i].set_title(s[ - j] + f"\n{round(y[-1], 5)}") # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) - except Exception as e: - print(f"Warning: Plotting error for {f}: {e}") - ax[1].legend() - fig.savefig(save_dir / "results.png", dpi=200) - plt.close() - - -def plot_one_box(x, img, color=None, label=None, line_thickness=None): - import random - - # Plots one bounding box on image img - tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1) # line/font thickness - color = color or [random.randint(0, 255) for _ in range(3)] - c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) - cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) - if label: - tf = max(tl - 1, 1) # font thickness - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 - cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled - cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA, ) - - -def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")): - """ - x: Features to be visualized - module_type: Module type - stage: Module stage within model - n: Maximum number of feature maps to plot - save_dir: Directory to save results - """ - if "Detect" not in module_type: - batch, channels, height, width = x.shape # batch, channels, height, width - if height > 1 and width > 1: - f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename - - blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels - n = min(n, channels) # number of plots - fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols - ax = ax.ravel() - plt.subplots_adjust(wspace=0.05, hspace=0.05) - for i in range(n): - ax[i].imshow(blocks[i].squeeze()) # cmap='gray' - ax[i].axis("off") - - print(f"Saving {save_dir / f}... ({n}/{channels})") - plt.savefig(save_dir / f, dpi=300, bbox_inches="tight") - plt.close() - - -def plot_images_and_masks(images, targets, masks, paths=None, fname="images.jpg", names=None, max_size=640, - max_subplots=16, ): - # Plot image grid with labels - # print("targets:", targets.shape) - # print("masks:", masks.shape) - # print('--------------------------') - - if isinstance(images, torch.Tensor): - images = images.cpu().float().numpy() - if isinstance(targets, torch.Tensor): - targets = targets.cpu().numpy() - if isinstance(masks, torch.Tensor): - masks = masks.cpu().numpy() - masks = masks.astype(int) - - # un-normalise - if np.max(images[0]) <= 1: - images *= 255 - - tl = 3 # line thickness - tf = max(tl - 1, 1) # font thickness - bs, _, h, w = images.shape # batch size, _, height, width - bs = min(bs, max_subplots) # limit plot images - ns = np.ceil(bs ** 0.5) # number of subplots (square) - - # Check if we should resize - scale_factor = max_size / max(h, w) - if scale_factor < 1: - h = math.ceil(scale_factor * h) - w = math.ceil(scale_factor * w) - - mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, img in enumerate(images): - if i == max_subplots: # if last batch has fewer images than we expect - break - - block_x = int(w * (i // ns)) - block_y = int(h * (i % ns)) - - img = img.transpose(1, 2, 0) - if scale_factor < 1: - img = cv2.resize(img, (w, h)) - - mosaic[block_y: block_y + h, block_x: block_x + w, :] = img - if len(targets) > 0: - idx = (targets[:, 0]).astype(int) - image_targets = targets[idx == i] - # print(targets.shape) - # print(masks.shape) - image_masks = masks[idx == i] - # mosaic_masks - # mosaic_masks[block_y:block_y + h, - # block_x:block_x + w, :] = image_masks - boxes = xywh2xyxy(image_targets[:, 2:6]).T - classes = image_targets[:, 1].astype("int") - labels = image_targets.shape[1] == 6 # labels if no conf column - conf = (None if labels else image_targets[:, 6]) # check for confidence presence (label vs pred) - - if boxes.shape[1]: - if boxes.max() <= 1.01: # if normalized with tolerance 0.01 - boxes[[0, 2]] *= w # scale to pixels - boxes[[1, 3]] *= h - elif scale_factor < 1: # absolute coords need scale if image scales - boxes *= scale_factor - boxes[[0, 2]] += block_x - boxes[[1, 3]] += block_y - for j, box in enumerate(boxes.T): - cls = int(classes[j]) - color = colors(cls) - cls = names[cls] if names else cls - mask = image_masks[j].astype(np.bool) - # print(mask.shape) - # print(mosaic.shape) - if labels or conf[j] > 0.25: # 0.25 conf thresh - label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j]) - plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) - mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] = \ - mosaic[block_y: block_y + h, block_x: block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65) - - # Draw image filename labels - if paths: - label = Path(paths[i]).name[:40] # trim to 40 char - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, - lineType=cv2.LINE_AA, ) - - # Image border - cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3, ) - - if fname: - r = min(1280.0 / max(h, w) / ns, 1.0) # ratio to limit image size - mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) - # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save - Image.fromarray(mosaic).save(fname) # PIL save - return mosaic - - -def plot_images_boxes_and_masks(images, targets, masks=None, paths=None, fname="images.jpg", names=None, max_size=640, - max_subplots=16, ): - if masks is not None: - return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots) - else: - return plot_images(images, targets, paths, fname, names, max_size, max_subplots) - - -def plot_masks(img, masks, colors, alpha=0.5): - """ - Args: - img (tensor): img on cuda, shape: [3, h, w], range: [0, 1] - masks (tensor): predicted masks on cuda, shape: [n, h, w] - colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] - Return: - img after draw masks, shape: [h, w, 3] - - transform colors and send img_gpu to cpu for the most time. - """ - img_gpu = img.clone() - num_masks = len(masks) - # [n, 1, 1, 3] - # faster this way to transform colors - colors = torch.tensor(colors, device=img.device).float() / 255.0 - colors = colors[:, None, None, :] - # [n, h, w, 1] - masks = masks[:, :, :, None] - masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha - inv_alph_masks = masks * (-alpha) + 1 - masks_color_summand = masks_color[0] - if num_masks > 1: - inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0) - masks_color_cumul = masks_color[1:] * inv_alph_cumul - masks_color_summand += masks_color_cumul.sum(dim=0) - - # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1] - img_gpu = img_gpu.flip(dims=[0]) # filp channel for opencv - img_gpu = img_gpu.permute(1, 2, 0).contiguous() - # [h, w, 3] - img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand - return (img_gpu * 255).byte().cpu().numpy() diff --git a/val_instseg.py b/val_instseg.py new file mode 100644 index 000000000000..20183b6d7118 --- /dev/null +++ b/val_instseg.py @@ -0,0 +1,85 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Validate a trained YOLOv5 model accuracy on a custom dataset + +Usage: + $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640 +""" + +import argparse +from evaluator import Yolov5Evaluator + +from utils.general import ( + set_logging, + print_args, + check_yaml, + check_requirements, +) + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument('-d', '--data', type=str, default='data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('-w', '--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') + parser.add_argument('-b', '--batch-size', type=int, default=32, help='batch size') + parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') + parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') + parser.add_argument('--task', default='val', help='train, val, test, speed or study') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') + parser.add_argument('--augment', action='store_true', help='augmented inference') + parser.add_argument('--verbose', action='store_true', help='report mAP by class') + parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') + parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') + parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') + parser.add_argument('--nosave', action='store_true', help='do not save anything.') + parser.add_argument('--project', default='runs/val', help='save to project/name') + parser.add_argument('--name', default='exp', help='save to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + parser.add_argument('--overlap-mask', action='store_true', help='Eval overlapping masks') + + opt = parser.parse_args() + opt.data = check_yaml(opt.data) # check YAML + opt.save_json |= opt.data.endswith('coco.yaml') + print_args(vars(opt)) + return opt + +def main(opt): + set_logging() + check_requirements(exclude=("tensorboard", "thop")) + evaluator = Yolov5Evaluator( + data=opt.data, + conf_thres=opt.conf_thres, + iou_thres=opt.iou_thres, + device=opt.device, + single_cls=opt.single_cls, + augment=opt.augment, + verbose=opt.verbose, + project=opt.project, + name=opt.name, + exist_ok=opt.exist_ok, + half=opt.half, + mask=True, + nosave=opt.nosave, + overlap=opt.overlap_mask, + ) + + if opt.task in ("train", "val", "test"): # run normally + evaluator.run( + weights=opt.weights, + batch_size=opt.batch_size, + imgsz=opt.imgsz, + save_txt=opt.save_txt, + save_conf=opt.save_conf, + save_json=opt.save_json, + task=opt.task, + ) + else: + raise ValueError(f"not support task {opt.task}") + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) From f45d6f3ffd25f1376e2af5bb5931a894cef1efe1 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 20 Jul 2022 17:26:04 +0530 Subject: [PATCH 041/247] log weights after last epoch --- utils/loggers/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index a142f607561e..e1edf484e5e6 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -332,6 +332,15 @@ def on_train_end(self, plots, epoch, masks=False): self.tb.add_image( f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats="HWC" ) + if self.wandb: + best = self.save_dir/ "weights" / "best.pt" + last = self.save_dir / "weights" / "last.pt" + wandb.log_artifact(str(best if best.exists() else last), + type='model', + name=f'run_{self.wandb.run.id}_model', + aliases=['latest', 'best', 'stripped']) + self.wandb.finish_run() + def on_params_update(self): # Update hyperparams or configs of the experiment From 8f0ca0ebf338fd536ffe7ef03ded7ea21c51ffcf Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 21 Jul 2022 20:16:09 +0800 Subject: [PATCH 042/247] add detect_instseg.py --- detect_instseg.py | 278 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 detect_instseg.py diff --git a/detect_instseg.py b/detect_instseg.py new file mode 100644 index 000000000000..2e67591fe936 --- /dev/null +++ b/detect_instseg.py @@ -0,0 +1,278 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Run inference on images, videos, directories, streams, etc. + +Usage - sources: + $ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam + img.jpg # image + vid.mp4 # video + path/ # directory + path/*.jpg # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream + +Usage - formats: + $ python path/to/detect.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s.xml # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS-only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU +""" + +import argparse +import os +import sys +from pathlib import Path + +import torch +import torch.backends.cudnn as cudnn + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.experimental import attempt_load +from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams +from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, + increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh) +from utils.plots import Annotator, colors, save_one_box, plot_masks +from utils.torch_utils import select_device, time_sync +from utils.segment import non_max_suppression_masks, scale_masks, process_mask_upsample + + +@torch.no_grad() +def run( + weights=ROOT / 'yolov5s.pt', # model.pt path(s) + source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / 'runs/detect', # save results to project/name + name='exp', # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference +): + source = str(source) + save_img = not nosave and not source.endswith('.txt') # save inference images + is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) + is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) + webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) + if is_url and is_file: + source = check_file(source) # download + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + device = select_device(device) + model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True) + stride = max(int(model.stride.max()), 32) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + model.half() if half else model.float() + pt = True + imgsz = check_img_size(imgsz, s=stride) # check image size + + # Dataloader + if webcam: + view_img = check_imshow() + cudnn.benchmark = True # set True to speed up constant image size inference + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) + bs = len(dataset) # batch_size + else: + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + bs = 1 # batch_size + vid_path, vid_writer = [None] * bs, [None] * bs + + # Run inference + if device != "cpu": + im = torch.zeros(1, 3, *imgsz).to(device).half() # input image + model(im) # warmup + seen, windows, dt = 0, [], [0.0, 0.0, 0.0] + for path, im, im0s, vid_cap, s in dataset: + t1 = time_sync() + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + t2 = time_sync() + dt[0] += t2 - t1 + + # Inference + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred, out = model(im, augment=augment, visualize=visualize) + proto = out[1] + t3 = time_sync() + dt[1] += t3 - t2 + + # NMS + pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + dt[2] += time_sync() - t3 + + # Second-stage classifier (optional) + # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) + + # Process predictions + for i, det in enumerate(pred): # per image + seen += 1 + if webcam: # batch_size >= 1 + p, im0, frame = path[i], im0s[i].copy(), dataset.count + s += f'{i}: ' + else: + p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) + + p = Path(p) # to Path + save_path = str(save_dir / p.name) # im.jpg + txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt + s += '%gx%g ' % im.shape[2:] # print string + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh + imc = im0.copy() if save_crop else im0 # for save_crop + annotator = Annotator(im0, line_width=line_thickness, example=str(names)) + if len(det): + # mask stuff + masks_conf = det[:, 6:] + # binary mask, (img_h, img_w, n) + masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:]) + # n, img_h, img_w + masks = masks.permute(2, 0, 1).contiguous() + # bbox stuff + det = det[:, :6] # update the value in outputs, remove mask part. + # Rescale boxes from img_size to im0 size + det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() + + # Print results + for c in det[:, -1].unique(): + n = (det[:, -1] == c).sum() # detections per class + s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string + + # plot masks + mcolors = [colors(int(cls)) for cls in det[:, 5]] + # NOTE: this way to draw masks is faster, + # but the image might get blurred, + # from https://github.com/dbolya/yolact + # image with masks, (img_h, img_w, 3) + img_masks = plot_masks(im[i], masks, mcolors) + # scale image to original hw + img_masks = scale_masks(im.shape[2:], img_masks, im0.shape) + annotator.im = img_masks + + # Write results + for *xyxy, conf, cls in reversed(det): + if save_txt: # Write to file + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(f'{txt_path}.txt', 'a') as f: + f.write(('%g ' * len(line)).rstrip() % line + '\n') + + if save_img or save_crop or view_img: # Add bbox to image + c = int(cls) # integer class + label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') + annotator.box_label(xyxy, label, color=colors(c, True)) + if save_crop: + save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) + + # Stream results + im0 = annotator.result() + if view_img: + if p not in windows: + windows.append(p) + cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) + cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) + cv2.imshow(str(p), im0) + cv2.waitKey(1) # 1 millisecond + + # Save results (image with detections) + if save_img: + if dataset.mode == 'image': + cv2.imwrite(save_path, im0) + else: # 'video' or 'stream' + if vid_path[i] != save_path: # new video + vid_path[i] = save_path + if isinstance(vid_writer[i], cv2.VideoWriter): + vid_writer[i].release() # release previous video writer + if vid_cap: # video + fps = vid_cap.get(cv2.CAP_PROP_FPS) + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + else: # stream + fps, w, h = 30, im0.shape[1], im0.shape[0] + save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos + vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) + vid_writer[i].write(im0) + + # Print time (inference-only) + LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') + + # Print results + t = tuple(x / seen * 1E3 for x in dt) # speeds per image + LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) + if save_txt or save_img: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") + if update: + strip_optimizer(weights) # update model (to fix SourceChangeWarning) + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') + parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') + parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') + parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--view-img', action='store_true', help='show results') + parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') + parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') + parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') + parser.add_argument('--nosave', action='store_true', help='do not save images/videos') + parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') + parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') + parser.add_argument('--augment', action='store_true', help='augmented inference') + parser.add_argument('--visualize', action='store_true', help='visualize features') + parser.add_argument('--update', action='store_true', help='update all models') + parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') + parser.add_argument('--name', default='exp', help='save results to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') + parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') + parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') + parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + opt = parser.parse_args() + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand + print_args(vars(opt)) + return opt + + +def main(opt): + check_requirements(exclude=('tensorboard', 'thop')) + run(**vars(opt)) + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) From 7fae119284224ba59885a6ab576f3f2962c75d5a Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 21 Jul 2022 18:07:07 +0530 Subject: [PATCH 043/247] fix check for device --- detect_instseg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detect_instseg.py b/detect_instseg.py index 2e67591fe936..34fab4372db4 100644 --- a/detect_instseg.py +++ b/detect_instseg.py @@ -88,6 +88,7 @@ def run( # Load model device = select_device(device) + import pdb;pdb.set_trace() model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names @@ -107,7 +108,7 @@ def run( vid_path, vid_writer = [None] * bs, [None] * bs # Run inference - if device != "cpu": + if str(device) != "cpu": im = torch.zeros(1, 3, *imgsz).to(device).half() # input image model(im) # warmup seen, windows, dt = 0, [], [0.0, 0.0, 0.0] From c307b45de45111e5e0dcd6849f8cb729558d9b7d Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 21 Jul 2022 18:07:42 +0530 Subject: [PATCH 044/247] remove pdb import --- detect_instseg.py | 1 - 1 file changed, 1 deletion(-) diff --git a/detect_instseg.py b/detect_instseg.py index 34fab4372db4..a703f75d486b 100644 --- a/detect_instseg.py +++ b/detect_instseg.py @@ -88,7 +88,6 @@ def run( # Load model device = select_device(device) - import pdb;pdb.set_trace() model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names From fd6bfbbcb589806eb047528cf6a2887c507e50b3 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Fri, 22 Jul 2022 18:43:26 +0530 Subject: [PATCH 045/247] finish run --- utils/loggers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index e1edf484e5e6..d04d42ef6f3e 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -339,7 +339,7 @@ def on_train_end(self, plots, epoch, masks=False): type='model', name=f'run_{self.wandb.run.id}_model', aliases=['latest', 'best', 'stripped']) - self.wandb.finish_run() + self.wandb.finish() def on_params_update(self): From 29e433be3795dbefc2dc6822e0b49558746a7bb2 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Wed, 3 Aug 2022 05:38:18 +0000 Subject: [PATCH 046/247] update bias init&&update obj loss --- evaluator.py | 8 ++++---- models/yolo.py | 2 +- train_instseg.py | 2 ++ utils/seg_loss.py | 18 +++++++++++++----- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/evaluator.py b/evaluator.py index 27533c3048f1..74096e3f5b32 100644 --- a/evaluator.py +++ b/evaluator.py @@ -16,18 +16,18 @@ import torch import torch.nn.functional as F from PIL import Image -# import pycocotools.mask as mask_util +import pycocotools.mask as mask_util from tqdm import tqdm from models.experimental import attempt_load from seg_dataloaders import create_dataloader from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, ) -from utils.general import (check_dataset, check_img_size, check_suffix, ) +from utils.general import (check_dataset, check_img_size, check_suffix) from utils.general import (coco80_to_coco91_class, increment_path, colorstr, ) from utils.plots import output_to_target, plot_images_boxes_and_masks from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, ) -from utils.torch_utils import select_device, time_sync +from utils.torch_utils import select_device, time_sync, de_parallel def save_one_txt(predn, save_conf, shape, file): @@ -304,7 +304,7 @@ def inference(self, model, img, targets, masks=None, compute_loss=None): targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device) # to pixels t3 = time_sync() out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True, - agnostic=self.single_cls, ) + agnostic=self.single_cls, mask_dim=de_parallel(model).model[-1].mask_dim) self.dt[2] += time_sync() - t3 return out, train_out diff --git a/models/yolo.py b/models/yolo.py index 786120b4902a..1d46726cf502 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -276,7 +276,7 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) def _print_biases(self): diff --git a/train_instseg.py b/train_instseg.py index b1ea72ff5757..5f98ff839ba6 100644 --- a/train_instseg.py +++ b/train_instseg.py @@ -177,6 +177,8 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) g[0].append(v.weight) + # hyp['lr0'] = hyp['lr0'] / batch_size * 128 + # hyp['warmup_bias_lr'] = 0.01 if opt.optimizer == 'Adam': optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum elif opt.optimizer == 'AdamW': diff --git a/utils/seg_loss.py b/utils/seg_loss.py index e5294a5300f7..94eebc7a0e5f 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -133,7 +133,7 @@ def loss_segment(self, preds, targets, masks): if self.sort_obj_iou: sort_id = torch.argsort(score_iou) b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],) - tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio + tobj[b, a, gj, gi] = 0.5 * ((1.0 - self.gr) + self.gr * score_iou) # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) @@ -170,7 +170,13 @@ def loss_segment(self, preds, targets, masks): psi = ps[index][:, 5: self.nm] proto = proto_out[bi] - batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) + one_lseg, iou = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) + batch_lseg += one_lseg + + # update tobj + iou = iou.detach().clamp(0).type(tobj.dtype) + tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0] + lseg += batch_lseg / len(b.unique()) obji = self.BCEobj(pi[..., 4], tobj) @@ -193,10 +199,12 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): """mask loss of single pic.""" # (80, 80, 32) @ (32, n) -> (80, 80, n) pred_mask = proto @ pred.tanh().T + # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy) + iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True) lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") lseg = crop(lseg, xyxy) lseg = lseg.mean(dim=(0, 1)) / w / h - return lseg.mean() + return lseg.mean(), iou# + lseg_iou.mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) @@ -334,7 +342,7 @@ class MaskIOULoss(nn.Module): def __init__(self) -> None: super().__init__() - def forward(self, pred_mask, gt_mask, mxyxy=None): + def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False): """ Args: pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n) @@ -349,7 +357,7 @@ def forward(self, pred_mask, gt_mask, mxyxy=None): pred_mask = pred_mask.permute(2, 0, 1).view(n, -1) gt_mask = gt_mask.permute(2, 0, 1).view(n, -1) iou = masks_iou(pred_mask, gt_mask) - return 1.0 - iou + return iou if return_iou else (1.0 - iou) import math From 005f8cd390026b40136722b6a06cc04d3177bc88 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 3 Aug 2022 14:21:13 +0530 Subject: [PATCH 047/247] log at correct steps --- evaluator.py | 4 +++- utils/loggers/__init__.py | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/evaluator.py b/evaluator.py index 74096e3f5b32..1db1ff28c7d0 100644 --- a/evaluator.py +++ b/evaluator.py @@ -94,6 +94,7 @@ def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls= "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % ( "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",)) + self.step = 0 # coco stuff self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith( @@ -163,6 +164,7 @@ def run_training(self, model, dataloader, compute_loss=None): # Return results model.float() # for training + self.step += 1 return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), self.metric.get_maps(self.nc), t,) @@ -493,7 +495,7 @@ def plot_images(self, i, img, targets, masks, out, paths): # daemon=True, ).start() import wandb if wandb.run: - wandb.log({f"pred_{i}": wandb.Image(str(f))}) + wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step) def nms(self, **kwargs): return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 14a5009ac880..92328a6b1403 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -404,8 +404,6 @@ def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots, sync if ni < 3: f = self.save_dir / f"train_batch{ni}.jpg" # filename plot_images_and_masks(imgs, targets, masks, paths, f) - if self.wandb: - wandb.log({"train_labels": wandb.Image(str(f))}) @@ -427,4 +425,4 @@ def on_fit_epoch_end(self, vals, epoch): for k, v in x.items(): self.tb.add_scalar(k, v, epoch) if self.wandb: - wandb.log(x) + wandb.log(x, step=epoch) From a5cfa79f60702d716850cbde660b45676c86b672 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 3 Aug 2022 15:00:32 +0530 Subject: [PATCH 048/247] update logger step --- utils/loggers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 92328a6b1403..8e670a86b1b1 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -425,4 +425,4 @@ def on_fit_epoch_end(self, vals, epoch): for k, v in x.items(): self.tb.add_scalar(k, v, epoch) if self.wandb: - wandb.log(x, step=epoch) + wandb.log(x, step=epoch, commit=True) From b89a2d65ef72e5042ad66cd0bb9419b2094a3840 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 3 Aug 2022 20:47:59 +0530 Subject: [PATCH 049/247] make compatible with torch 1.12 --- utils/seg_loss.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/seg_loss.py b/utils/seg_loss.py index 94eebc7a0e5f..e0618f831e63 100644 --- a/utils/seg_loss.py +++ b/utils/seg_loss.py @@ -294,8 +294,8 @@ def build_targets_for_masks(self, p, targets): ], device=targets.device, ).float() * g) # offsets for i in range(self.nl): - anchors = self.anchors[i] - gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain @@ -328,7 +328,7 @@ def build_targets_for_masks(self, p, targets): # Append a = t[:, 6].long() # anchor indices tidx = t[:, 7].long() - indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices + indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class From 146d96869cf218122fc41ba5ff7b6c006901fc1a Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Fri, 5 Aug 2022 15:59:55 +0530 Subject: [PATCH 050/247] update --- models/yolo.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index 1d46726cf502..885a1d7574c8 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -14,6 +14,8 @@ from copy import deepcopy from pathlib import Path +from torch import NoneType + FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: @@ -28,6 +30,7 @@ from utils.plots import feature_visualization from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync) +import torch.nn.functional as F try: import thop # for FLOPs computation @@ -108,7 +111,7 @@ def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inp # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), # nn.SiLU(inplace=True), # nn.Upsample(scale_factor=2, mode='nearest'), - nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), + Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), nn.SiLU(inplace=True), nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0), @@ -376,6 +379,18 @@ def parse_model(d, ch): # model_dict, input_channels(3) ch.append(c2) return nn.Sequential(*layers), sorted(save) +class Upsample(nn.Module): + ''' + deterministic upsample layer + ''' + def __init__(self, scale_factor, mode="bilinear", align_corners=False) -> None: + super().__init__() + self.scale_factor = scale_factor + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners) if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -409,4 +424,4 @@ def parse_model(d, ch): # model_dict, input_channels(3) print(f'Error in {cfg}: {e}') else: # report fused model summary - model.fuse() + model.fuse() \ No newline at end of file From 3cbaa348e23e879d3cef8da21cf9fc5449479e55 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Wed, 10 Aug 2022 18:06:33 +0800 Subject: [PATCH 051/247] clean up --- models/yolo.py | 7 +- requirements.txt | 1 + seg_augmentations.py | 287 ------ seg_dataloaders.py | 1094 ---------------------- detect_instseg.py => segment/detect.py | 6 +- evaluator.py => segment/evaluator.py | 13 +- train_instseg.py => segment/train.py | 12 +- val_instseg.py => segment/val.py | 0 segment/val_new.py | 459 +++++++++ utils/dataloaders.py | 1 + utils/general.py | 0 utils/metrics.py | 12 +- utils/seg_metrics.py | 361 ------- utils/segment/__init__.py | 0 utils/segment/augmentations.py | 114 +++ utils/segment/dataloaders.py | 305 ++++++ utils/{segment.py => segment/general.py} | 30 +- utils/{seg_loss.py => segment/loss.py} | 230 +---- utils/segment/metrics.py | 149 +++ 19 files changed, 1089 insertions(+), 1992 deletions(-) delete mode 100644 seg_augmentations.py delete mode 100644 seg_dataloaders.py rename detect_instseg.py => segment/detect.py (98%) rename evaluator.py => segment/evaluator.py (98%) rename train_instseg.py => segment/train.py (98%) rename val_instseg.py => segment/val.py (100%) create mode 100644 segment/val_new.py mode change 100755 => 100644 utils/dataloaders.py mode change 100755 => 100644 utils/general.py delete mode 100644 utils/seg_metrics.py create mode 100644 utils/segment/__init__.py create mode 100644 utils/segment/augmentations.py create mode 100644 utils/segment/dataloaders.py rename utils/{segment.py => segment/general.py} (89%) rename utils/{seg_loss.py => segment/loss.py} (55%) create mode 100644 utils/segment/metrics.py diff --git a/models/yolo.py b/models/yolo.py index 885a1d7574c8..cd9248e7c8c2 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -279,7 +279,10 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + if hasattr(m, "mask_dim"): + b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + else: + b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) def _print_biases(self): @@ -424,4 +427,4 @@ def forward(self, x): print(f'Error in {cfg}: {e}') else: # report fused model summary - model.fuse() \ No newline at end of file + model.fuse() diff --git a/requirements.txt b/requirements.txt index 6313cecee578..8e5720ac50aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ scipy>=1.4.1 torch>=1.7.0 torchvision>=0.8.1 tqdm>=4.64.0 +easydict>=1.9 protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012 # Logging ------------------------------------- diff --git a/seg_augmentations.py b/seg_augmentations.py deleted file mode 100644 index 409e021772b3..000000000000 --- a/seg_augmentations.py +++ /dev/null @@ -1,287 +0,0 @@ -# TODO: Move to utils, merge with augmentations.py - -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Image augmentation functions -""" - -import logging -import math -import random - -import cv2 -import numpy as np - -from utils.general import colorstr, check_version -from utils.seg_metrics import bbox_ioa -from utils.segment import segment2box, resample_segments - - -class Albumentations: - # YOLOv5 Albumentations class (optional, only used if package is installed) - def __init__(self): - self.transform = None - try: - import albumentations as A - - check_version(A.__version__, "1.0.3") # version requirement - - self.transform = A.Compose([A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01), - A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0), ], - bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), ) - - logging.info(colorstr("albumentations: ") + ", ".join(f"{x}" for x in self.transform.transforms if x.p)) - except ImportError: # package not installed, skip - pass - except Exception as e: - logging.info(colorstr("albumentations: ") + f"{e}") - - def __call__(self, im, labels, p=1.0): - if self.transform and random.random() < p: - new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed - im, labels = new["image"], np.array([[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])]) - return im, labels - - -def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): - # HSV color-space augmentation - if hgain or sgain or vgain: - r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains - hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) - dtype = im.dtype # uint8 - - x = np.arange(0, 256, dtype=r.dtype) - lut_hue = ((x * r[0]) % 180).astype(dtype) - lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) - lut_val = np.clip(x * r[2], 0, 255).astype(dtype) - - im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) - cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed - - -def hist_equalize(im, clahe=True, bgr=False): - # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255 - yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) - if clahe: - c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) - yuv[:, :, 0] = c.apply(yuv[:, :, 0]) - else: - yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram - return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB - - -def replicate(im, labels): - # Replicate labels - h, w = im.shape[:2] - boxes = labels[:, 1:].astype(int) - x1, y1, x2, y2 = boxes.T - s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) - for i in s.argsort()[: round(s.size * 0.5)]: # smallest indices - x1b, y1b, x2b, y2b = boxes[i] - bh, bw = y2b - y1b, x2b - x1b - yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y - x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] - im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] - labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) - - return im, labels - - -def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32, - center=True, # center padding or left top padding -): - # Resize and pad image while meeting stride-multiple constraints - shape = im.shape[:2] # current shape [height, width] - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - if not scaleup: # only scale down, do not scale up (for better val mAP) - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - if auto: # minimum rectangle - dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding - elif scaleFill: # stretch - dw, dh = 0.0, 0.0 - new_unpad = (new_shape[1], new_shape[0]) - ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios - - if center: - dw /= 2 # divide padding into 2 sides - dh /= 2 - - if shape[::-1] != new_unpad: # resize - im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)) if center else 0, int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)) if center else 0, int(round(dw + 0.1)) - im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border - return im, ratio, (dw, dh) - - -def random_perspective(im, targets=(), segments=(), degrees=10, translate=0.1, scale=0.1, shear=10, perspective=0.0, - border=(0, 0), area_thr=0.2, return_seg=False, ): - # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) - # targets = [cls, xyxy] - - height = im.shape[0] + border[0] * 2 # shape(h,w,c) - width = im.shape[1] + border[1] * 2 - - # Center - C = np.eye(3) - C[0, 2] = -im.shape[1] / 2 # x translation (pixels) - C[1, 2] = -im.shape[0] / 2 # y translation (pixels) - - # Perspective - P = np.eye(3) - P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) - P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) - - # Rotation and Scale - R = np.eye(3) - a = random.uniform(-degrees, degrees) - # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations - s = random.uniform(1 - scale, 1 + scale) - # s = 2 ** random.uniform(-scale, scale) - R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) - - # Shear - S = np.eye(3) - S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) - S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) - - # Translation - T = np.eye(3) - T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) - T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) - - # Combined rotation matrix - M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT - if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed - if perspective: - im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) - else: # affine - im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) - - # Visualize - # import matplotlib.pyplot as plt - # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() - # ax[0].imshow(im[:, :, ::-1]) # base - # ax[1].imshow(im2[:, :, ::-1]) # warped - - # Transform label coordinates - n = len(targets) - new_segments = [] - if n: - use_segments = any(x.any() for x in segments) - new = np.zeros((n, 4)) - if use_segments: # warp segments - segments = resample_segments(segments) # upsample - for i, segment in enumerate(segments): - xy = np.ones((len(segment), 3)) - xy[:, :2] = segment - xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine - - # clip - new[i] = segment2box(xy, width, height) - new_segments.append(xy) - - else: # warp boxes - xy = np.ones((n * 4, 3)) - xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine - - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T) - - # clip - new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) - new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) - - # filter candidates - i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, cls=targets[:, 0], - # area_thr=0.01 if use_segments else 0.10, - area_thr=area_thr, ) - targets = targets[i] - targets[:, 1:5] = new[i] - new_segments = (np.array(new_segments)[i] if len(new_segments) else np.array(new_segments)) - - return (im, targets, new_segments) if return_seg else (im, targets) - - -def copy_paste(im, labels, segments, p=0.5): - # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) - n = len(segments) - if p and n: - h, w, c = im.shape # height, width, channels - im_new = np.zeros(im.shape, np.uint8) - for j in random.sample(range(n), k=round(p * n)): - l, s = labels[j], segments[j] - box = w - l[3], l[2], w - l[1], l[4] - ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area - if (ioa < 0.30).all(): # allow 30% obscuration of existing labels - labels = np.concatenate((labels, [[l[0], *box]]), 0) - segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) - cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED, ) - - result = cv2.bitwise_and(src1=im, src2=im_new) - result = cv2.flip(result, 1) # augment segments (flip left-right) - i = result > 0 # pixels to replace - # i[:, :] = result.max(2).reshape(h, w, 1) # act over ch - im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug - - return im, labels, segments - - -def cutout(im, labels, p=0.5): - # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 - if random.random() < p: - h, w = im.shape[:2] - scales = ([0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16) # image size fraction - for s in scales: - mask_h = random.randint(1, int(h * s)) # create random masks - mask_w = random.randint(1, int(w * s)) - - # box - xmin = max(0, random.randint(0, w) - mask_w // 2) - ymin = max(0, random.randint(0, h) - mask_h // 2) - xmax = min(w, xmin + mask_w) - ymax = min(h, ymin + mask_h) - - # apply random color mask - im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] - - # return unobscured labels - if len(labels) and s > 0.03: - box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) - ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area - labels = labels[ioa < 0.60] # remove >60% obscured labels - - return labels - - -def mixup(im, labels, im2, labels2): - # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf - r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 - im = (im * r + im2 * (1 - r)).astype(np.uint8) - labels = np.concatenate((labels, labels2), 0) - return im, labels - - -def box_candidates(box1, box2, cls, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) - # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio - w1, h1 = box1[2] - box1[0], box1[3] - box1[1] - w2, h2 = box2[2] - box2[0], box2[3] - box2[1] - area_thr = (np.array(area_thr)[cls.astype(np.int)] if isinstance(area_thr, list) else area_thr) - if isinstance(area_thr, list) and len(area_thr) == 1: - area_thr = area_thr[0] - ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio - return ((w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)) # candidates diff --git a/seg_dataloaders.py b/seg_dataloaders.py deleted file mode 100644 index 4d74bb00c1a9..000000000000 --- a/seg_dataloaders.py +++ /dev/null @@ -1,1094 +0,0 @@ -## TODO: Move to utils, merge with dataloaders.py - -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Dataloaders -""" - -import json -import logging -import time -import numpy as np -from functools import wraps -from itertools import repeat -from multiprocessing.pool import ThreadPool, Pool -from pathlib import Path -from zipfile import ZipFile -from PIL import Image -from tqdm import tqdm - -import torch.nn.functional as F -import yaml -from torch.utils.data import Dataset as torchDataset -from torch.utils.data import distributed -from torch.utils.data.sampler import BatchSampler as torchBatchSampler -from torch.utils.data.sampler import RandomSampler - -from seg_augmentations import (Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective, ) -from utils.general import colorstr, check_dataset, check_yaml, xywhn2xyxy, xyxy2xywhn, xyn2xy -from utils.torch_utils import torch_distributed_zero_first - - -class _RepeatSampler: - """ Sampler that repeats forever - - Args: - sampler (Sampler) - """ - - def __init__(self, sampler): - self.sampler = sampler - - def __iter__(self): - while True: - yield from iter(self.sampler) - - -class YoloBatchSampler(torchBatchSampler): - """ - This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler. - It works just like the :class:`torch.utils.data.sampler.BatchSampler`, - but it will turn on/off the mosaic aug. - """ - - def __init__(self, *args, augment=True, **kwargs): - super().__init__(*args, **kwargs) - self.augment = augment - - def __iter__(self): - for batch in super().__iter__(): - yield [(self.augment, idx) for idx in batch] - - -def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, - rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix="", shuffle=False, - area_thr=0.2, mask_head=False, mask_downsample_ratio=1, overlap_mask=False): - if rect and shuffle: - print("WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False") - shuffle = False - data_load = LoadImagesAndLabelsAndMasks if mask_head else LoadImagesAndLabels - # Make sure only the first process in DDP process the dataset first, and the following others can use the cache - with torch_distributed_zero_first(rank): - dataset = data_load(path, imgsz, batch_size, augment=augment, # augment images - hyp=hyp, # augmentation hyperparameters - rect=rect, # rectangular training - cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights, - prefix=prefix, area_thr=area_thr, ) - if mask_head: - dataset.downsample_ratio = mask_downsample_ratio - dataset.overlap = overlap_mask - - batch_size = min(batch_size, len(dataset)) - nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, workers]) # number of workers - # sampler = InfiniteSampler(len(dataset), seed=0) - sampler = (distributed.DistributedSampler(dataset, shuffle=shuffle) if rank != -1 else RandomSampler(dataset)) - - batch_sampler = (YoloBatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False, - augment=augment, ) if not rect else None) - dataloader = DataLoader(dataset, num_workers=nw, batch_size=1 if batch_sampler is not None else batch_size, - # batch-size and batch-sampler is exclusion - batch_sampler=batch_sampler, pin_memory=True, - collate_fn=data_load.collate_fn4 if quad else data_load.collate_fn, - ) - return dataloader, dataset - - -class Dataset(torchDataset): - """This class is a subclass of the base :class:`torch.utils.data.Dataset`, - that enables on the fly resizing of the ``input_dim``. - - Args: - input_dimension (tuple): (width,height) tuple with default dimensions of the network - """ - - def __init__(self, augment=True): - super().__init__() - self.augment = augment - - @staticmethod - def mosaic_getitem(getitem_fn): - """ - Decorator method that needs to be used around the ``__getitem__`` method. |br| - This decorator enables the closing mosaic - - Example: - >>> class CustomSet(ln.data.Dataset): - ... def __len__(self): - ... return 10 - ... @ln.data.Dataset.mosaic_getitem - ... def __getitem__(self, index): - ... return self.enable_mosaic - """ - - @wraps(getitem_fn) - def wrapper(self, index): - if not isinstance(index, int): - self.augment = index[0] - index = index[1] - - ret_val = getitem_fn(self, index) - - return ret_val - - return wrapper - - -class LoadImagesAndLabels(Dataset): - # YOLOv5 train_loader/val_loader, loads images and labels for training and validation - cache_version = 0.6 # dataset labels *.cache version - - def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", area_thr=0.2, ): - super().__init__(augment=augment) - self.img_size = img_size - self.hyp = hyp - self.image_weights = image_weights - self.rect = False if image_weights else rect - self.mosaic = (self.augment and not self.rect) # load 4 images at a time into a mosaic (only during training) - self.mosaic_border = [-img_size // 2, -img_size // 2] - self.stride = stride - self.path = path - self.albumentations = Albumentations() if augment else None - - # additional feature - self.area_thr = area_thr - - p = Path(path) # os-agnostic - self.img_files = self.get_img_files(p, prefix) - self.label_files = img2label_paths(self.img_files) # labels - # Check cache - cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache") - labels, shapes, segments, img_files, label_files = self.load_cache(cache_path, prefix) - - self.segments = segments - self.labels = list(labels) - self.shapes = np.array(shapes, dtype=np.float64) - self.img_files = img_files # update - self.label_files = label_files # update - - num_imgs = len(shapes) # number of images - batch_index = np.floor(np.arange(num_imgs) / batch_size).astype(np.int) # batch index - self.batch_index = batch_index # batch index of image - self.num_imgs = num_imgs - self.indices = range(num_imgs) - - # Update labels - for i, (_, segment) in enumerate(zip(self.labels, self.segments)): - if single_cls: # single-class training, merge all classes into 0 - self.labels[i][:, 0] = 0 - if segment: - self.segments[i][:, 0] = 0 - - # Rectangular Training - if self.rect: - num_batches = batch_index[-1] + 1 # number of batches - self.update_rect(num_batches, pad) - - # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) - self.imgs, self.img_npy = [None] * num_imgs, [None] * num_imgs - if cache_images: - self.cache_images(cache_images, prefix) - - def cache_images(self, cache_images, prefix): - """Cache images to disk or ram for faster speed.""" - if cache_images == "disk": - self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + "_npy") - self.img_npy = [self.im_cache_dir / Path(f).with_suffix(".npy").name for f in self.img_files] - self.im_cache_dir.mkdir(parents=True, exist_ok=True) - gb = 0 # Gigabytes of cached images - self.img_hw0, self.img_hw = [None] * self.num_imgs, [None] * self.num_imgs - results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(self.num_imgs))) - pbar = tqdm(enumerate(results), total=self.num_imgs) - for i, x in pbar: - if cache_images == "disk": - if not self.img_npy[i].exists(): - np.save(self.img_npy[i].as_posix(), x[0]) - gb += self.img_npy[i].stat().st_size - else: - (self.imgs[i], self.img_hw0[i], self.img_hw[i],) = x # im, hw_orig, hw_resized = load_image(self, i) - gb += self.imgs[i].nbytes - pbar.desc = f"{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})" - pbar.close() - - def get_img_files(self, p, prefix): - """Read image files.""" - try: - f = [] # image files - if p.is_dir(): # dir - f += glob.glob(str(p / "**" / "*.*"), recursive=True) # f = list(p.rglob('*.*')) # pathlib - elif p.is_file(): # file - with open(p, "r") as t: - t = t.read().strip().splitlines() - parent = str(p.parent) + os.sep - f += [x.replace("./", parent) if x.startswith("./") else x for x in - t] # local to global path # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) - else: - raise Exception(f"{prefix}{p} does not exist") - img_files = sorted([x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS]) - # img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib - assert img_files, f"{prefix}No images found" - except Exception as e: - raise Exception(f"{prefix}Error loading data from {str(p)}: {e}\nSee {HELP_URL}") - return img_files - - def load_cache(self, cache_path, prefix): - """Load labels from *.cache file.""" - try: - cache, exists = (np.load(cache_path, allow_pickle=True).item(), True,) # load dict - assert cache["version"] == self.cache_version # same version - assert cache["hash"] == get_hash(self.label_files + self.img_files) # same hash - except: - cache, exists = self.cache_labels(cache_path, prefix), False # cache - - # Display cache - nf, nm, ne, nc, n = cache.pop("results") # found, missing, empty, corrupted, total - if exists: - d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted" - tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results - if cache["msgs"]: - logging.info("\n".join(cache["msgs"])) # display warnings - assert ( - nf > 0 or not self.augment), f"{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}" - - # Read cache - [cache.pop(k) for k in ("hash", "version", "msgs")] # remove items - labels, shapes, segments = zip(*cache.values()) - img_files = list(cache.keys()) # update - label_files = img2label_paths(cache.keys()) # update - return labels, shapes, segments, img_files, label_files - - def update_rect(self, num_batches, pad): - """Update attr if rect is True.""" - # Sort by aspect ratio - s = self.shapes # wh - ar = s[:, 1] / s[:, 0] # aspect ratio - irect = ar.argsort() - self.img_files = [self.img_files[i] for i in irect] - self.label_files = [self.label_files[i] for i in irect] - self.labels = [self.labels[i] for i in irect] - self.segments = [self.segments[i] for i in irect] - self.shapes = s[irect] # wh - ar = ar[irect] - - # Set training image shapes - shapes = [[1, 1]] * num_batches - for i in range(num_batches): - ari = ar[self.batch_index == i] - mini, maxi = ari.min(), ari.max() - if maxi < 1: - shapes[i] = [maxi, 1] - elif mini > 1: - shapes[i] = [1, 1 / mini] - - self.batch_shapes = (np.ceil(np.array(shapes) * self.img_size / self.stride + pad).astype(np.int) * self.stride) - - def cache_labels(self, path=Path("./labels.cache"), prefix=""): - """Cache labels to *.cache file if there is no *.cache file in local.""" - # Cache dataset labels, check images and read shapes - x = {} # dict - nm, nf, ne, nc, msgs = (0, 0, 0, 0, [],) # number missing, found, empty, corrupt, messages - desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." - - with Pool(NUM_THREADS) as pool: - pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix)), ), - desc=desc, total=len(self.img_files), ) - for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: - nm += nm_f - nf += nf_f - ne += ne_f - nc += nc_f - if im_file: - x[im_file] = [l, shape, segments] - if msg: - msgs.append(msg) - pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted" - - pbar.close() - if msgs: - logging.info("\n".join(msgs)) - if nf == 0: - logging.info(f"{prefix}WARNING: No labels found in {path}. See {HELP_URL}") - x["hash"] = get_hash(self.label_files + self.img_files) - x["results"] = nf, nm, ne, nc, len(self.img_files) - x["msgs"] = msgs # warnings - x["version"] = self.cache_version # cache version - try: - np.save(path, x) # save cache for next time - path.with_suffix(".cache.npy").rename(path) # remove .npy suffix - logging.info(f"{prefix}New cache created: {path}") - except Exception as e: - logging.info(f"{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}") # path not writeable - return x - - def __len__(self): - return len(self.img_files) - - # def __iter__(self): - # self.count = -1 - # print('ran dataset iter') - # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) - # return self - - @Dataset.mosaic_getitem - def __getitem__(self, index): - index = self.indices[index] # linear, shuffled, or image_weights - - hyp = self.hyp - self.mosaic = self.augment and not self.rect - mosaic = self.mosaic and random.random() < hyp["mosaic"] - if mosaic: - # Load mosaic - img, labels = load_mosaic(self, index) - shapes = None - - # MixUp augmentation - if random.random() < hyp["mixup"]: - img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1))) - - else: - # Load image - img, (h0, w0), (h, w) = load_image(self, index) - - # Letterbox - shape = ( - self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size) # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling - - labels = self.labels[index].copy() - if labels.size: # normalized xywh to pixel xyxy format - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) - - if self.augment: - img, labels = random_perspective(img, labels, degrees=hyp["degrees"], translate=hyp["translate"], - scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], ) - - nl = len(labels) # number of labels - if nl: - labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) - - if self.augment: - # Albumentations - img, labels = self.albumentations(img, labels) - nl = len(labels) # update after albumentations - - # HSV color-space - augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) - - # Flip up-down - if random.random() < hyp["flipud"]: - img = np.flipud(img) - if nl: - labels[:, 2] = 1 - labels[:, 2] - - # Flip left-right - if random.random() < hyp["fliplr"]: - img = np.fliplr(img) - if nl: - labels[:, 1] = 1 - labels[:, 1] - - # Cutouts # labels = cutout(img, labels, p=0.5) - - labels_out = torch.zeros((nl, 6)) - if nl: - labels_out[:, 1:] = torch.from_numpy(labels) - - # Convert - img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img = np.ascontiguousarray(img) - - return torch.from_numpy(img), labels_out, self.img_files[index], shapes - - @staticmethod - def collate_fn(batch): - img, label, path, shapes = zip(*batch) # transposed - for i, l in enumerate(label): - l[:, 0] = i # add target image index for build_targets() - return torch.stack(img, 0), torch.cat(label, 0), path, shapes, None - - @staticmethod - def collate_fn4(batch): - img, label, path, shapes = zip(*batch) # transposed - n = len(shapes) // 4 - img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] - - ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) - wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) - s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale - for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW - i *= 4 - if random.random() < 0.5: - im = \ - F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode="bilinear", align_corners=False, )[ - 0].type(img[i].type()) - l = label[i] - else: - im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1),), 2, ) - l = (torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo,), 0, ) * s) - img4.append(im) - label4.append(l) - - for i, l in enumerate(label4): - l[:, 0] = i # add target image index for build_targets() - - return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4 - - -class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing - def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0, prefix="", area_thr=0.2, - downsample_ratio=1, overlap=False, - ): - super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, - stride, pad, prefix, area_thr, ) - self.downsample_ratio = downsample_ratio - self.overlap = overlap - - @Dataset.mosaic_getitem - def __getitem__(self, index): - index = self.indices[index] # linear, shuffled, or image_weights - - hyp = self.hyp - self.mosaic = self.augment and not self.rect - mosaic = self.mosaic and random.random() < hyp["mosaic"] - masks = [] - if mosaic: - # Load mosaic - img, labels, segments = load_mosaic(self, index, return_seg=True) - shapes = None - - # TODO: Mixup not support segment for now - # MixUp augmentation - if random.random() < hyp["mixup"]: - img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.num_imgs - 1))) - - else: - # Load image - img, (h0, w0), (h, w) = load_image(self, index) - - # Letterbox - shape = ( - self.batch_shapes[self.batch_index[index]] if self.rect else self.img_size) # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling - - labels = self.labels[index].copy() - # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy - segments = self.segments[index].copy() - # TODO - if len(segments): - for i_s in range(len(segments)): - segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], ) - if labels.size: # normalized xywh to pixel xyxy format - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) - - if self.augment: - img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"], - translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], - return_seg=True, ) - - nl = len(labels) # number of labels - if nl: - labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) - if self.overlap: - masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, - downsample_ratio=self.downsample_ratio) - masks = masks[None] # (640, 640) -> (1, 640, 640) - labels = labels[sorted_idx] - else: - masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) - - masks = (torch.from_numpy(masks) if len(masks) else - torch.zeros(1 if self.overlap else nl, - img.shape[0] // self.downsample_ratio, - img.shape[1] // self.downsample_ratio)) - # TODO: albumentations support - if self.augment: - # Albumentations - # there are some augmentation that won't change boxes and masks, - # so just be it for now. - img, labels = self.albumentations(img, labels) - nl = len(labels) # update after albumentations - - # HSV color-space - augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) - - # Flip up-down - if random.random() < hyp["flipud"]: - img = np.flipud(img) - if nl: - labels[:, 2] = 1 - labels[:, 2] - masks = torch.flip(masks, dims=[1]) - - # Flip left-right - if random.random() < hyp["fliplr"]: - img = np.fliplr(img) - if nl: - labels[:, 1] = 1 - labels[:, 1] - masks = torch.flip(masks, dims=[2]) - - # Cutouts # labels = cutout(img, labels, p=0.5) - - labels_out = torch.zeros((nl, 6)) - if nl: - labels_out[:, 1:] = torch.from_numpy(labels) - - # Convert - img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img = np.ascontiguousarray(img) - - return (torch.from_numpy(img), labels_out, self.img_files[index], shapes, masks) - - @staticmethod - def collate_fn(batch): - img, label, path, shapes, masks = zip(*batch) # transposed - batched_masks = torch.cat(masks, 0) - # print(batched_masks.shape) - # print('batched_masks:', (batched_masks > 0).sum()) - for i, l in enumerate(label): - l[:, 0] = i # add target image index for build_targets() - return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks - - -# Ancillary functions -------------------------------------------------------------------------------------------------- -def load_image(self, i): - # loads 1 image from dataset index 'i', returns im, original hw, resized hw - im = self.imgs[i] - if im is None: # not cached in ram - npy = self.img_npy[i] - if npy and npy.exists(): # load npy - im = np.load(npy) - else: # read image - path = self.img_files[i] - im = cv2.imread(path) # BGR - assert im is not None, "Image Not Found " + path - h0, w0 = im.shape[:2] # orig hw - r = self.img_size / max(h0, w0) # ratio - if r != 1: # if sizes are not equal - im = cv2.resize(im, (int(w0 * r), int(h0 * r)), - interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR, ) - return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized - else: - return (self.imgs[i], self.img_hw0[i], self.img_hw[i],) # im, hw_original, hw_resized - - -def load_mosaic(self, index, return_seg=False): - # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic - labels4, segments4 = [], [] - s = self.img_size - yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y - - # 3 additional image indices - indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices - for i, index in enumerate(indices): - # Load image - img, _, (h, w) = load_image(self, index) - - # place img in img4 - if i == 0: # top left - img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) - elif i == 1: # top right - x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc - x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h - elif i == 2: # bottom left - x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) - x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) - elif i == 3: # bottom right - x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) - x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - - img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] - padw = x1a - x1b - padh = y1a - y1b - - labels, segments = self.labels[index].copy(), self.segments[index].copy() - - if labels.size: - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format - segments = [xyn2xy(x, w, h, padw, padh) for x in segments] - labels4.append(labels) - segments4.extend(segments) - - # Concat/clip labels - labels4 = np.concatenate(labels4, 0) - for x in (labels4[:, 1:], *segments4): - np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() - # img4, labels4 = replicate(img4, labels4) # replicate - - # Augment - img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) - results = random_perspective(img4, labels4, segments4, degrees=self.hyp["degrees"], translate=self.hyp["translate"], - scale=self.hyp["scale"], shear=self.hyp["shear"], perspective=self.hyp["perspective"], - border=self.mosaic_border, area_thr=self.area_thr, return_seg=return_seg, ) # border to remove - # return (img4, labels4, segments4) if return_seg else (img4, labels4) - return results - - -def load_mosaic9(self, index): - # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic - labels9, segments9 = [], [] - s = self.img_size - indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices - random.shuffle(indices) - for i, index in enumerate(indices): - # Load image - img, _, (h, w) = load_image(self, index) - - # place img in img9 - if i == 0: # center - img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - h0, w0 = h, w - c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates - elif i == 1: # top - c = s, s - h, s + w, s - elif i == 2: # top right - c = s + wp, s - h, s + wp + w, s - elif i == 3: # right - c = s + w0, s, s + w0 + w, s + h - elif i == 4: # bottom right - c = s + w0, s + hp, s + w0 + w, s + hp + h - elif i == 5: # bottom - c = s + w0 - w, s + h0, s + w0, s + h0 + h - elif i == 6: # bottom left - c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h - elif i == 7: # left - c = s - w, s + h0 - h, s, s + h0 - elif i == 8: # top left - c = s - w, s + h0 - hp - h, s, s + h0 - hp - - padx, pady = c[:2] - x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords - - # Labels - labels, segments = self.labels[index].copy(), self.segments[index].copy() - if labels.size: - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format - segments = [xyn2xy(x, w, h, padx, pady) for x in segments] - labels9.append(labels) - segments9.extend(segments) - - # Image - img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] - hp, wp = h, w # height, width previous - - # Offset - yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y - img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s] - - # Concat/clip labels - labels9 = np.concatenate(labels9, 0) - labels9[:, [1, 3]] -= xc - labels9[:, [2, 4]] -= yc - c = np.array([xc, yc]) # centers - segments9 = [x - c for x in segments9] - - for x in (labels9[:, 1:], *segments9): - np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() - # img9, labels9 = replicate(img9, labels9) # replicate - - # Augment - img9, labels9 = random_perspective(img9, labels9, segments9, degrees=self.hyp["degrees"], - translate=self.hyp["translate"], scale=self.hyp["scale"], shear=self.hyp["shear"], - perspective=self.hyp["perspective"], border=self.mosaic_border, ) # border to remove - - return img9, labels9 - - -def dataset_stats(path="coco128.yaml", autodownload=False, verbose=False, profile=False, hub=False): - """Return dataset statistics dictionary with images and instances counts per split per class - To run in parent directory: export PYTHONPATH="$PWD/yolov5" - Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True) - Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip') - Arguments - path: Path to data.yaml or data.zip (with data.yaml inside data.zip) - autodownload: Attempt to download dataset if not found locally - verbose: Print stats dictionary - """ - - def round_labels(labels): - # Update labels to integer class and 6 decimal place floats - return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels] - - def unzip(path): - # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/' - if str(path).endswith(".zip"): # path is data.zip - assert Path(path).is_file(), f"Error unzipping {path}, file not found" - ZipFile(path).extractall(path=path.parent) # unzip - dir = path.with_suffix("") # dataset directory == zip name - return (True, str(dir), next(dir.rglob("*.yaml")),) # zipped, data_dir, yaml_path - else: # path is data.yaml - return False, None, path - - def hub_ops(f, max_dim=1920): - # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing - f_new = im_dir / Path(f).name # dataset-hub image filename - try: # use PIL - im = Image.open(f) - r = max_dim / max(im.height, im.width) # ratio - if r < 1.0: # image too large - im = im.resize((int(im.width * r), int(im.height * r))) - im.save(f_new, quality=75) # save - except Exception as e: # use OpenCV - print(f"WARNING: HUB ops PIL failure {f}: {e}") - im = cv2.imread(f) - im_height, im_width = im.shape[:2] - r = max_dim / max(im_height, im_width) # ratio - if r < 1.0: # image too large - im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_LINEAR, ) - cv2.imwrite(str(f_new), im) - - zipped, data_dir, yaml_path = unzip(Path(path)) - with open(check_yaml(yaml_path), errors="ignore") as f: - data = yaml.safe_load(f) # data dict - if zipped: - data["path"] = data_dir # TODO: should this be dir.resolve()? - check_dataset(data, autodownload) # download dataset if missing - hub_dir = Path(data["path"] + ("-hub" if hub else "")) - stats = {"nc": data["nc"], "names": data["names"]} # statistics dictionary - for split in "train", "val", "test": - if data.get(split) is None: - stats[split] = None # i.e. no test set - continue - x = [] - dataset = LoadImagesAndLabels(data[split]) # load dataset - for label in tqdm(dataset.labels, total=dataset.num_imgs, desc="Statistics"): - x.append(np.bincount(label[:, 0].astype(int), minlength=data["nc"])) - x = np.array(x) # shape(128x80) - stats[split] = {"instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()}, - "image_stats": {"total": dataset.num_imgs, "unlabelled": int(np.all(x == 0, 1).sum()), - "per_class": (x > 0).sum(0).tolist(), }, - "labels": [{str(Path(k).name): round_labels(v.tolist())} for k, v in - zip(dataset.img_files, dataset.labels)], } - - if hub: - im_dir = hub_dir / "images" - im_dir.mkdir(parents=True, exist_ok=True) - for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.num_imgs, - desc="HUB Ops", ): - pass - - # Profile - stats_path = hub_dir / "stats.json" - if profile: - for _ in range(1): - file = stats_path.with_suffix(".npy") - t1 = time.time() - np.save(file, stats) - t2 = time.time() - x = np.load(file, allow_pickle=True) - print(f"stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write") - - file = stats_path.with_suffix(".json") - t1 = time.time() - with open(file, "w") as f: - json.dump(stats, f) # save stats *.json - t2 = time.time() - with open(file, "r") as f: - x = json.load(f) # load hyps dict - print(f"stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write") - - # Save, print and return - if hub: - print(f"Saving {stats_path.resolve()}...") - with open(stats_path, "w") as f: - json.dump(stats, f) # save stats.json - if verbose: - print(json.dumps(stats, indent=2, sort_keys=False)) - return stats - - -# REFACTOR IN NEW FILE -import os -import glob -import shutil -import hashlib -import torch -import cv2 -import random -from pathlib import Path -from PIL import ImageOps, ExifTags -from utils.segment import segments2boxes -from utils.general import xywh2xyxy - -# Parameters -HELP_URL = "https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data" -IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo", ] # acceptable image suffixes -VID_FORMATS = ["mov", "avi", "mp4", "mpg", "mpeg", "m4v", "wmv", "mkv", "vdo", "flv", ] # acceptable video suffixes -NUM_THREADS = min(8, os.cpu_count()) # number of multiprocessing threads - -# Get orientation exif tag -for orientation in ExifTags.TAGS.keys(): - if ExifTags.TAGS[orientation] == "Orientation": - break - - -def get_hash(paths): - # Returns a single hash value of a list of paths (files or dirs) - size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes - h = hashlib.md5(str(size).encode()) # hash sizes - h.update("".join(paths).encode()) # hash paths - return h.hexdigest() # return hash - - -def exif_size(img): - # Returns exif-corrected PIL size - s = img.size # (width, height) - try: - rotation = dict(img._getexif().items())[orientation] - if rotation == 6: # rotation 270 - s = (s[1], s[0]) - elif rotation == 8: # rotation 90 - s = (s[1], s[0]) - except: - pass - - return s - - -def exif_transpose(image): - """ - Transpose a PIL image accordingly if it has an EXIF Orientation tag. - Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose() - - :param image: The image to transpose. - :return: An image. - """ - exif = image.getexif() - orientation = exif.get(0x0112, 1) # default 1 - if orientation > 1: - method = {2: Image.FLIP_LEFT_RIGHT, 3: Image.ROTATE_180, 4: Image.FLIP_TOP_BOTTOM, 5: Image.TRANSPOSE, - 6: Image.ROTATE_270, 7: Image.TRANSVERSE, 8: Image.ROTATE_90, }.get(orientation) - if method is not None: - image = image.transpose(method) - del exif[0x0112] - image.info["exif"] = exif.tobytes() - return image - -def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): - """ - Args: - img_size (tuple): The image size. - polygons (np.ndarray): [N, M], N is the number of polygons, - M is the number of points(Be divided by 2). - """ - mask = np.zeros(img_size, dtype=np.uint8) - polygons = np.asarray(polygons) - polygons = polygons.astype(np.int32) - shape = polygons.shape - polygons = polygons.reshape(shape[0], -1, 2) - cv2.fillPoly(mask, polygons, color=color) - nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) - # NOTE: fillPoly then resize is trying the keep the same way - # of loss calculation when mask-ratio=1. - mask = cv2.resize(mask, (nw, nh)) - return mask - - -def polygons2masks(img_size, polygons, color, downsample_ratio=1): - """ - Args: - img_size (tuple): The image size. - polygons (list[np.ndarray]): each polygon is [N, M], - N is the number of polygons, - M is the number of points(Be divided by 2). - """ - masks = [] - for si in range(len(polygons)): - mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, - downsample_ratio) - masks.append(mask) - return np.array(masks) - - -def polygons2masks_overlap(img_size, segments, downsample_ratio=1): - """Return a (640, 640) overlap mask.""" - masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), - dtype=np.uint8) - areas = [] - ms = [] - for si in range(len(segments)): - mask = polygon2mask( - img_size, - [segments[si].reshape(-1)], - downsample_ratio=downsample_ratio, - color=1, - ) - ms.append(mask) - areas.append(mask.sum()) - areas = np.asarray(areas) - index = np.argsort(-areas) - ms = np.array(ms)[index] - for i in range(len(segments)): - mask = ms[i] * (i + 1) - masks = masks + mask - masks = np.clip(masks, a_min=0, a_max=i + 1) - return masks, index - - -def img2label_paths(img_paths): - # Define label paths as a function of image paths - sa, sb = (os.sep + "images" + os.sep, os.sep + "labels" + os.sep,) # /images/, /labels/ substrings - return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths] - - -def create_folder(path="./new"): - # Create folder - if os.path.exists(path): - shutil.rmtree(path) # delete output folder - os.makedirs(path) # make new output folder - - -def flatten_recursive(path="../datasets/coco128"): - # Flatten a recursive directory by bringing all files to top level - new_path = Path(path + "_flat") - create_folder(new_path) - for file in tqdm(glob.glob(str(Path(path)) + "/**/*.*", recursive=True)): - shutil.copyfile(file, new_path / Path(file).name) - - -def extract_boxes(path="../datasets/coco128", ): # from utils.datasets import *; extract_boxes() - # Convert detection dataset into classification dataset, with one directory per class - path = Path(path) # images dir - shutil.rmtree(path / "classifier") if (path / "classifier").is_dir() else None # remove existing - files = list(path.rglob("*.*")) - n = len(files) # number of files - for im_file in tqdm(files, total=n): - if im_file.suffix[1:] in IMG_FORMATS: - # image - im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB - h, w = im.shape[:2] - - # labels - lb_file = Path(img2label_paths([str(im_file)])[0]) - if Path(lb_file).exists(): - with open(lb_file, "r") as f: - lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32, ) # labels - - for j, x in enumerate(lb): - c = int(x[0]) # class - f = ((path / "classifier") / f"{c}" / f"{path.stem}_{im_file.stem}_{j}.jpg") # new filename - if not f.parent.is_dir(): - f.parent.mkdir(parents=True) - - b = x[1:] * [w, h, w, h] # box - # b[2:] = b[2:].max() # rectangle to square - b[2:] = b[2:] * 1.2 + 3 # pad - b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) - - b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image - b[[1, 3]] = np.clip(b[[1, 3]], 0, h) - assert cv2.imwrite(str(f), im[b[1]: b[3], b[0]: b[2]]), f"box failure in {f}" - - -def autosplit(path="../datasets/coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False): - """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files - Usage: from utils.datasets import *; autosplit() - Arguments - path: Path to images directory - weights: Train, val, test weights (list, tuple) - annotated_only: Only use images with an annotated txt file - """ - path = Path(path) # images dir - files = sorted([x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS]) # image files only - n = len(files) # number of files - random.seed(0) # for reproducibility - indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split - - txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt", ] # 3 txt files - [(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing - - print(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only) - for i, img in tqdm(zip(indices, files), total=n): - if (not annotated_only or Path(img2label_paths([str(img)])[0]).exists()): # check label - with open(path.parent / txt[i], "a") as f: - f.write("./" + img.relative_to(path.parent).as_posix() + "\n") # add image to txt file - - -def verify_image_label(args): - # Verify one image-label pair - im_file, lb_file, prefix = args - nm, nf, ne, nc, msg, segments = (0, 0, 0, 0, "", [],) # number (missing, found, empty, corrupt), message, segments - try: - # verify images - im = Image.open(im_file) - im.verify() # PIL verify - shape = exif_size(im) # image size - assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels" - assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}" - if im.format.lower() in ("jpg", "jpeg"): - with open(im_file, "rb") as f: - f.seek(-2, 2) - if f.read() != b"\xff\xd9": # corrupt JPEG - ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100) - msg = f"{prefix}WARNING: {im_file}: corrupt JPEG restored and saved" - - # verify labels - if os.path.isfile(lb_file): - nf = 1 # label found - with open(lb_file, "r") as f: - l = [x.split() for x in f.read().strip().splitlines() if len(x)] - if any([len(x) > 6 for x in l]): # is segment - classes = np.array([x[0] for x in l], dtype=np.float32) - segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...) - l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) - l = np.array(l, dtype=np.float32) - nl = len(l) - if nl: - assert (l.shape[1] == 5), f"labels require 5 columns, {l.shape[1]} columns detected" - assert (l >= 0).all(), f"negative label values {l[l < 0]}" - assert (l[:, 1:] <= 1).all(), f"non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}" - l, idx = np.unique(l, axis=0, return_index=True) # remove duplicate rows - # NOTE: `np.unique` will change the order of `l`, so adjust the segments order too. - segments = [segments[i] for i in idx] if len(segments) > 0 else segments - if len(l) < nl: - msg = f"{prefix}WARNING: {im_file}: {nl - len(l)} duplicate labels removed" - else: - ne = 1 # label empty - l = np.zeros((0, 5), dtype=np.float32) - else: - nm = 1 # label missing - l = np.zeros((0, 5), dtype=np.float32) - return im_file, l, shape, segments, nm, nf, ne, nc, msg - except Exception as e: - nc = 1 - msg = f"{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}" - return [None, None, None, None, nm, nf, ne, nc, msg] - - -from torch.utils.data import DataLoader as torchDataLoader - - -class DataLoader(torchDataLoader): - """ - Lightnet dataloader that enables on the fly resizing of the images. - See :class:`torch.utils.data.DataLoader` for more information on the arguments. - Check more on the following website: - https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def close_augment(self): - self.batch_sampler.augment = False - - -class InfiniteDataLoader(torchDataLoader): - """Dataloader that reuses workers - - Uses same syntax as vanilla DataLoader - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler)) - self.iterator = super().__iter__() - - def __len__(self): - return len(self.batch_sampler.sampler) - - def __iter__(self): - for i in range(len(self)): - yield next(self.iterator) diff --git a/detect_instseg.py b/segment/detect.py similarity index 98% rename from detect_instseg.py rename to segment/detect.py index a703f75d486b..d8e6150873f6 100644 --- a/detect_instseg.py +++ b/segment/detect.py @@ -169,7 +169,7 @@ def run( s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # plot masks - mcolors = [colors(int(cls)) for cls in det[:, 5]] + mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))] # NOTE: this way to draw masks is faster, # but the image might get blurred, # from https://github.com/dbolya/yolact @@ -180,7 +180,7 @@ def run( annotator.im = img_masks # Write results - for *xyxy, conf, cls in reversed(det): + for i, (*xyxy, conf, cls) in enumerate(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format @@ -190,7 +190,7 @@ def run( if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') - annotator.box_label(xyxy, label, color=colors(c, True)) + annotator.box_label(xyxy, label, color=colors(i, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) diff --git a/evaluator.py b/segment/evaluator.py similarity index 98% rename from evaluator.py rename to segment/evaluator.py index 1db1ff28c7d0..acf8f94e42cf 100644 --- a/evaluator.py +++ b/segment/evaluator.py @@ -15,18 +15,17 @@ import numpy as np import torch import torch.nn.functional as F -from PIL import Image import pycocotools.mask as mask_util from tqdm import tqdm from models.experimental import attempt_load -from seg_dataloaders import create_dataloader +from utils.segment.dataloaders import create_dataloader from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, ) from utils.general import (check_dataset, check_img_size, check_suffix) from utils.general import (coco80_to_coco91_class, increment_path, colorstr, ) from utils.plots import output_to_target, plot_images_boxes_and_masks -from utils.seg_metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix -from utils.segment import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, ) +from utils.segment.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix +from utils.segment.general import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, ) from utils.torch_utils import select_device, time_sync, de_parallel @@ -493,9 +492,9 @@ def plot_images(self, i, img, targets, masks, out, paths): #Thread(target=plot_images_boxes_and_masks, # args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),), # daemon=True, ).start() - import wandb - if wandb.run: - wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step) + # import wandb + # if wandb.run: + # wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step) def nms(self, **kwargs): return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) diff --git a/train_instseg.py b/segment/train.py similarity index 98% rename from train_instseg.py rename to segment/train.py index 5f98ff839ba6..1bee611e4ec1 100644 --- a/train_instseg.py +++ b/segment/train.py @@ -33,7 +33,7 @@ from tqdm import tqdm FILE = Path(__file__).resolve() -ROOT = FILE.parents[0] # YOLOv5 root directory +ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative @@ -44,15 +44,15 @@ from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks -from seg_dataloaders import create_dataloader +from utils.segment.dataloaders import create_dataloader from utils.downloads import attempt_download from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) -from utils.loggers import Loggers, NewLoggersMask +from utils.loggers import NewLoggersMask from utils.loggers.wandb.wandb_utils import check_wandb_resume -from utils.seg_loss import ComputeLoss +from utils.segment.loss import ComputeLoss #from utils.metrics import fitness from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first @@ -251,7 +251,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '), - mask_head=True, shuffle=True, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, @@ -274,7 +273,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio rank=-1, workers=workers * 2, pad=0.5, - mask_head=True, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, prefix=colorstr('val: '))[0] @@ -344,7 +342,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: - train_loader.batch_sampler.sampler.set_epoch(epoch) + train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) if RANK in {-1, 0}: diff --git a/val_instseg.py b/segment/val.py similarity index 100% rename from val_instseg.py rename to segment/val.py diff --git a/segment/val_new.py b/segment/val_new.py new file mode 100644 index 000000000000..033dec732bd8 --- /dev/null +++ b/segment/val_new.py @@ -0,0 +1,459 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Validate a trained YOLOv5 model accuracy on a custom dataset + +Usage: + $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640 + +Usage - formats: + $ python path/to/val.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s.xml # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS-only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU +""" + +import argparse +import json +import os +import sys +from pathlib import Path + +import numpy as np +import torch +from tqdm import tqdm + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +import torch.nn.functional as F +import pycocotools.mask as mask_util +from models.common import DetectMultiBackend +from utils.callbacks import Callbacks +from utils.segment.dataloaders import create_dataloader +from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, + coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, + scale_coords, xywh2xyxy, xyxy2xywh, de_parallel) +from utils.segment.general import non_max_suppression_masks, process_mask_upsample, mask_iou, scale_masks +from utils.metrics import ConfusionMatrix, ap_per_class, box_iou +from utils.segment.metrics import ap_per_class_box_and_mask, Metrics +from utils.plots import output_to_target, plot_images, plot_val_study +from utils.torch_utils import select_device, time_sync + + +def save_one_txt(predn, save_conf, shape, file): + # Save one txt result + gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh + for *xyxy, conf, cls in predn.tolist(): + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(file, 'a') as f: + f.write(('%g ' * len(line)).rstrip() % line + '\n') + + +def save_one_json(predn, jdict, path, class_map, pred_masks): + # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} + image_id = int(path.stem) if path.stem.isnumeric() else path.stem + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + + pred_masks = np.transpose(pred_masks, (2, 0, 1)) + rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): + pred_dict = { + 'image_id': image_id, + 'category_id': class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'score': round(p[4], 5)} + pred_dict["segmentation"] = rles[i] + jdict.append(pred_dict) + + +def process_batch(detections, labels, iouv): + """ + Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. + Arguments: + detections (Array[N, 6]), x1, y1, x2, y2, conf, class + labels (Array[M, 5]), class, x1, y1, x2, y2 + Returns: + correct (Array[N, 10]), for 10 IoU levels + """ + correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) + iou = box_iou(labels[:, 1:], detections[:, :4]) + correct_class = labels[:, 0:1] == detections[:, 5] + for i in range(len(iouv)): + x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match + if x[0].shape[0]: + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + correct[matches[:, 1].astype(int), i] = True + return torch.tensor(correct, dtype=torch.bool, device=iouv.device) + + +def process_batch_masks(self, predn, pred_masks, gt_masks, labels, iouv, overlap): + correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) + # convert masks (1, 640, 640) -> (n, 640, 640) + if overlap: + nl = len(labels) + index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 + gt_masks = gt_masks.repeat(nl, 1, 1) + gt_masks = torch.where(gt_masks == index, 1.0, 0.0) + + if gt_masks.shape[1:] != pred_masks.shape[1:]: + gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear", + align_corners=False, ).squeeze(0) + + iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), ) + x = torch.where( + (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + matches = torch.Tensor(matches).to(self.iouv.device) + correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv + return correct + + +@torch.no_grad() +def run( + data, + weights=None, # model.pt path(s) + batch_size=32, # batch size + imgsz=640, # inference size (pixels) + conf_thres=0.001, # confidence threshold + iou_thres=0.6, # NMS IoU threshold + task='val', # train, val, test, speed or study + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + workers=8, # max dataloader workers (per RANK in DDP mode) + single_cls=False, # treat as single-class dataset + augment=False, # augmented inference + verbose=False, # verbose output + save_txt=False, # save results to *.txt + save_hybrid=False, # save label+prediction hybrid results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_json=False, # save a COCO-JSON results file + project=ROOT / 'runs/val', # save to project/name + name='exp', # save to project/name + exist_ok=False, # existing project/name ok, do not increment + half=True, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + model=None, + dataloader=None, + save_dir=Path(''), + plots=True, + overlap=False, + mask_downsample_ratio=1, + callbacks=Callbacks(), + compute_loss=None, +): + # Initialize/load model and set device + training = model is not None + if training: # called by train.py + device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model + half &= device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() + else: # called directly + device = select_device(device, batch_size=batch_size) + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine + imgsz = check_img_size(imgsz, s=stride) # check image size + half = model.fp16 # FP16 supported on limited backends with CUDA + if engine: + batch_size = model.batch_size + else: + device = model.device + if not (pt or jit): + batch_size = 1 # export.py models default to batch-size 1 + LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') + + # Data + data = check_dataset(data) # check + + # Configure + model.eval() + cuda = device.type != 'cpu' + is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset + nc = 1 if single_cls else int(data['nc']) # number of classes + iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 + niou = iouv.numel() + + # Dataloader + if not training: + if pt and not single_cls: # check --weights are trained on --data + ncm = model.model.nc + assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ + f'classes). Pass correct combination of --weights and --data that are trained together.' + model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup + pad = 0.0 if task in ('speed', 'benchmark') else 0.5 + rect = False if task == 'benchmark' else pt # square inference for benchmarks + task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images + dataloader = create_dataloader(data[task], + imgsz, + batch_size, + stride, + single_cls, + pad=pad, + rect=rect, + workers=workers, + prefix=colorstr(f'{task}: '), + mask_downsample_ratio=1, + overlap_mask=overlap, + mask_downsample_ratio=mask_downsample_ratio)[0] + + seen = 0 + confusion_matrix = ConfusionMatrix(nc=nc) + names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} + class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) + s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", + "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}") + dt = [0.0, 0.0, 0.0] + metrics = Metrics() + loss = torch.zeros(4, device=device) + jdict, stats = [], [] + callbacks.run('on_val_start') + pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar + for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar): + callbacks.run('on_val_batch_start') + t1 = time_sync() + if cuda: + im = im.to(device, non_blocking=True) + targets = targets.to(device) + masks = masks.to(device).float() + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + nb, _, height, width = im.shape # batch size, channels, height, width + t2 = time_sync() + dt[0] += t2 - t1 + + # Inference + out, train_out = model(im) if training else model(im, augment=augment, val=True) # inference, loss outputs + dt[1] += time_sync() - t2 + + # Loss + if compute_loss: + loss += compute_loss([x.float() for x in train_out], targets, masks)[1] # box, obj, cls + + # NMS + targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels + lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling + t3 = time_sync() + out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, + mask_dim=de_parallel(model).model[-1].mask_dim) + dt[2] += time_sync() - t3 + + # Metrics + for si, pred in enumerate(out): + labels = targets[targets[:, 0] == si, 1:] + midx = [si] if overlap else targets[:, 0] == si + gt_masks = masks[midx] + proto_out = train_out[1][si] + pred_masks = process_mask_upsample(proto_out, pred[:, 6:], pred[:, :4], + shape=im[si].shape[1:]).permute(2, 0, 1).contiguous() + + nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions + path, shape = Path(paths[si]), shapes[si][0] + correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init + correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init + seen += 1 + + if npr == 0: + if nl: + stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) + continue + + # Predictions + if single_cls: + pred[:, 5] = 0 + predn = pred.clone() + scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred + + # Evaluate + if nl: + tbox = xywh2xyxy(labels[:, 1:5]) # target boxes + scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels + labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels + correct_bboxes = process_batch(predn, labelsn, iouv) + correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap) + if plots: + confusion_matrix.process_batch(predn, labelsn) + stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) + + # Save/log + if save_txt: + save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) + if save_json: + pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, shapes[si][1]) + save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary + callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) + + # Plot images + if plots and batch_i < 3: + # TODO: plot with masks + plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels + plot_images(im, output_to_target(out), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred + + callbacks.run('on_val_batch_end') + + # Compute metrics + stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy + if len(stats) and stats[0].any(): + results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) + metrics.update(results) + # ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 + # mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() + nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class + else: + nt = torch.zeros(1) + + # Print results + pf = '%20s' + '%11i' * 2 + '%11.3g' * 8 # print format + LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results())) + + # Print results per class + if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): + for i, c in enumerate(metrics.ap_class_index): + LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i))) + + # Print speeds + t = tuple(x / seen * 1E3 for x in dt) # speeds per image + if not training: + shape = (batch_size, 3, imgsz, imgsz) + LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) + + # Plots + if plots: + confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) + callbacks.run('on_val_end') + + # Save JSON + if save_json and len(jdict): + w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights + anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json + pred_json = str(save_dir / f"{w}_predictions.json") # predictions json + LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') + with open(pred_json, 'w') as f: + json.dump(jdict, f) + + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + check_requirements(['pycocotools']) + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, 'bbox') + if is_coco: + eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate + eval.evaluate() + eval.accumulate() + eval.summarize() + # TODO: update these to metrics + map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) + except Exception as e: + LOGGER.info(f'pycocotools unable to run: {e}') + + # Return results + model.float() # for training + if not training: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") + return ((*metrics.mean_results(), *(loss.cpu() / len(dataloader)).tolist()), + metrics.get_maps(nc), t,) + # maps = np.zeros(nc) + map + # for i, c in enumerate(ap_class): + # maps[c] = ap[i] + # return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)') + parser.add_argument('--batch-size', type=int, default=32, help='batch size') + parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') + parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') + parser.add_argument('--task', default='val', help='train, val, test, speed or study') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') + parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') + parser.add_argument('--augment', action='store_true', help='augmented inference') + parser.add_argument('--verbose', action='store_true', help='report mAP by class') + parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') + parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') + parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') + parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') + parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name') + parser.add_argument('--name', default='exp', help='save to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') + opt = parser.parse_args() + opt.data = check_yaml(opt.data) # check YAML + opt.save_json |= opt.data.endswith('coco.yaml') + opt.save_txt |= opt.save_hybrid + print_args(vars(opt)) + return opt + + +def main(opt): + check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop')) + + if opt.task in ('train', 'val', 'test'): # run normally + if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 + LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️')) + run(**vars(opt)) + + else: + weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] + opt.half = True # FP16 for fastest results + if opt.task == 'speed': # speed benchmarks + # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt... + opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False + for opt.weights in weights: + run(**vars(opt), plots=False) + + elif opt.task == 'study': # speed vs mAP benchmarks + # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt... + for opt.weights in weights: + f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to + x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis + for opt.imgsz in x: # img-size + LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') + r, _, t = run(**vars(opt), plots=False) + y.append(r + t) # results and times + np.savetxt(f, y, fmt='%10.4g') # save + os.system('zip -r study.zip study_*.txt') + plot_val_study(x=x) # plot + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) diff --git a/utils/dataloaders.py b/utils/dataloaders.py old mode 100755 new mode 100644 index 9ccfe2545d75..260fb6a97da9 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -511,6 +511,7 @@ def __init__(self, self.im_files = [self.im_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] + self.segments = [self.segments[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] diff --git a/utils/general.py b/utils/general.py old mode 100755 new mode 100644 diff --git a/utils/metrics.py b/utils/metrics.py index cfdfbdb88b2c..605e692de04d 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -26,7 +26,7 @@ def smooth(y, f=0.05): return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed -def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16): +def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""): """ Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments @@ -81,10 +81,10 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data names = dict(enumerate(names)) # to dict if plot: - plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) - plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') - plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') - plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') + plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names) + plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1') + plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision') + plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall') i = smooth(f1.mean(0), 0.1).argmax() # max F1 index p, r, f1 = p[:, i], r[:, i], f1[:, i] @@ -352,4 +352,4 @@ def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confi ax.set_ylim(0, 1) plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") fig.savefig(save_dir, dpi=250) - plt.close() \ No newline at end of file + plt.close() diff --git a/utils/seg_metrics.py b/utils/seg_metrics.py deleted file mode 100644 index 9c6133118dfa..000000000000 --- a/utils/seg_metrics.py +++ /dev/null @@ -1,361 +0,0 @@ -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Model validation metrics -""" - -import math -import warnings -from pathlib import Path - -import matplotlib.pyplot as plt -import numpy as np -import torch -from easydict import EasyDict as edict - - -def fitness(x, masks=False): - # Model fitness as a weighted combination of metrics - if masks: - w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] - return (x[:, :8] * w).sum(1) - w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (x[:, :4] * w).sum(1) - - -def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), prefix=""): - """Compute the average precision, given the recall and precision curves. - Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. - # Arguments - tp: True positives (nparray, nx1 or nx10). - conf: Objectness value from 0-1 (nparray). - pred_cls: Predicted object classes (nparray). - target_cls: True object classes (nparray). - plot: Plot precision-recall curve at mAP@0.5 - save_dir: Plot save directory. - prefix: prefix. - # Returns - The average precision as computed in py-faster-rcnn. - """ - - # Sort by objectness - i = np.argsort(-conf) - tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] - - # Find unique classes - unique_classes = np.unique(target_cls) - nc = unique_classes.shape[0] # number of classes, number of detections - - # Create Precision-Recall curve and compute AP for each class - px, py = np.linspace(0, 1, 1000), [] # for plotting - ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) - for ci, c in enumerate(unique_classes): - i = pred_cls == c - n_l = (target_cls == c).sum() # number of labels - n_p = i.sum() # number of predictions - - if n_p == 0 or n_l == 0: - continue - else: - # Accumulate FPs and TPs - fpc = (1 - tp[i]).cumsum(0) - tpc = tp[i].cumsum(0) - - # Recall - recall = tpc / (n_l + 1e-16) # recall curve - r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases - - # Precision - precision = tpc / (tpc + fpc) # precision curve - p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score - - # AP from recall-precision curve - for j in range(tp.shape[1]): - ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) - if plot and j == 0: - py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 - - # Compute F1 (harmonic mean of precision and recall) - f1 = 2 * p * r / (p + r + 1e-16) - names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data - names = {i: v for i, v in enumerate(names)} # to dict - if plot and save_dir is not None: - plot_pr_curve(px, py, ap, Path(save_dir) / f"{prefix}PR_curve.png", names) - plot_mc_curve(px, f1, Path(save_dir) / f"{prefix}F1_curve.png", names, ylabel="F1") - plot_mc_curve(px, p, Path(save_dir) / f"{prefix}P_curve.png", names, ylabel="Precision") - plot_mc_curve(px, r, Path(save_dir) / f"{prefix}R_curve.png", names, ylabel="Recall") - - i = f1.mean(0).argmax() # max F1 index - return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype("int32") - - -def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ): - """ - Args: - tp_b: tp of boxes. - tp_m: tp of masks. - other arguments see `func: ap_per_class`. - """ - results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, - prefix="Box", ) - results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, - prefix="Mask", ) - - results = edict({ - "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3], - "ap_class": results_boxes[4], }, - "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3], - "ap_class": results_masks[4], }, }) - return results - - -def compute_ap(recall, precision): - """Compute the average precision, given the recall and precision curves - # Arguments - recall: The recall curve (list) - precision: The precision curve (list) - # Returns - Average precision, precision curve, recall curve - """ - - # Append sentinel values to beginning and end - mrec = np.concatenate(([0.0], recall, [1.0])) - mpre = np.concatenate(([1.0], precision, [0.0])) - - # Compute the precision envelope - mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) - - # Integrate area under curve - method = "interp" # methods: 'continuous', 'interp' - if method == "interp": - x = np.linspace(0, 1, 101) # 101-point interp (COCO) - ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate - else: # 'continuous' - i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve - - return ap, mpre, mrec - - -class ConfusionMatrix: - # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix - def __init__(self, nc, conf=0.25, iou_thres=0.45): - self.matrix = np.zeros((nc + 1, nc + 1)) - self.nc = nc # number of classes - self.conf = conf - self.iou_thres = iou_thres - - def process_batch(self, detections, labels): - """ - Return intersection-over-union (Jaccard index) of boxes. - Both sets of boxes are expected to be in (x1, y1, x2, y2) format. - Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 - Returns: - None, updates confusion matrix accordingly - """ - detections = detections[detections[:, 4] > self.conf] - gt_classes = labels[:, 0].int() - detection_classes = detections[:, 5].int() - iou = box_iou(labels[:, 1:], detections[:, :4]) - - x = torch.where(iou > self.iou_thres) - if x[0].shape[0]: - matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - else: - matches = np.zeros((0, 3)) - - n = matches.shape[0] > 0 - m0, m1, _ = matches.transpose().astype(np.int16) - for i, gc in enumerate(gt_classes): - j = m0 == i - if n and sum(j) == 1: - self.matrix[detection_classes[m1[j]], gc] += 1 # correct - else: - self.matrix[self.nc, gc] += 1 # background FP - - if n: - for i, dc in enumerate(detection_classes): - if not any(m1 == i): - self.matrix[dc, self.nc] += 1 # background FN - - def matrix(self): - return self.matrix - - def plot(self, normalize=True, save_dir="", names=()): - try: - import seaborn as sn - - array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-6) if normalize else 1) # normalize columns - array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) - - fig = plt.figure(figsize=(12, 9), tight_layout=True) - sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size - labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels - with warnings.catch_warnings(): - warnings.simplefilter("ignore") # suppress empty matrix RuntimeWarning: All-NaN slice encountered - sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap="Blues", fmt=".2f", square=True, - xticklabels=names + ["background FP"] if labels else "auto", - yticklabels=names + ["background FN"] if labels else "auto", ).set_facecolor((1, 1, 1)) - fig.axes[0].set_xlabel("True") - fig.axes[0].set_ylabel("Predicted") - fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250) - plt.close() - except Exception as e: - print(f"WARNING: ConfusionMatrix plot failure: {e}") - - def print(self): - for i in range(self.nc + 1): - print(" ".join(map(str, self.matrix[i]))) - - -def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): - # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 - box2 = box2.T - - # Get the coordinates of bounding boxes - if x1y1x2y2: # x1, y1, x2, y2 = box1 - b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] - else: # transform from xywh to xyxy - b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 - b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 - b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 - b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 - - # Intersection area - inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( - torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) - - # Union Area - w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps - w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps - union = w1 * h1 + w2 * h2 - inter + eps - - iou = inter / union - if GIoU or DIoU or CIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width - ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height - if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 - c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared - rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + ( - b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared - if DIoU: - return iou - rho2 / c2 # DIoU - elif (CIoU): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) - with torch.no_grad(): - alpha = v / (v - iou + (1 + eps)) - return iou - (rho2 / c2 + v * alpha) # CIoU - else: # GIoU https://arxiv.org/pdf/1902.09630.pdf - c_area = cw * ch + eps # convex area - return iou - (c_area - union) / c_area # GIoU - else: - return iou # IoU - - -def box_iou(box1, box2): - # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py - """ - Return intersection-over-union (Jaccard index) of boxes. - Both sets of boxes are expected to be in (x1, y1, x2, y2) format. - Arguments: - box1 (Tensor[N, 4]) - box2 (Tensor[M, 4]) - Returns: - iou (Tensor[N, M]): the NxM matrix containing the pairwise - IoU values for every element in boxes1 and boxes2 - """ - - def box_area(box): - # box = 4xn - return (box[2] - box[0]) * (box[3] - box[1]) - - area1 = box_area(box1.T) - area2 = box_area(box2.T) - - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - inter = ((torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)) - return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) - - -def bbox_ioa(box1, box2, eps=1e-7): - """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 - box1: np.array of shape(4) - box2: np.array of shape(nx4) - returns: np.array of shape(n) - """ - - box2 = box2.transpose() - - # Get the coordinates of bounding boxes - b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] - - # Intersection area - inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * ( - np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) - - # box2 area - box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps - - # Intersection over box2 area - return inter_area / box2_area - - -def wh_iou(wh1, wh2): - # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 - wh1 = wh1[:, None] # [N,1,2] - wh2 = wh2[None] # [1,M,2] - inter = torch.min(wh1, wh2).prod(2) # [N,M] - return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) - - -# Plots ---------------------------------------------------------------------------------------------------------------- - - -def plot_pr_curve(px, py, ap, save_dir="pr_curve.png", names=()): - # Precision-recall curve - fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) - py = np.stack(py, axis=1) - - if 0 < len(names) < 21: # display per-class legend if < 21 classes - for i, y in enumerate(py.T): - ax.plot(px, y, linewidth=1, label=f"{names[i]} {ap[i, 0]:.3f}") # plot(recall, precision) - else: - ax.plot(px, py, linewidth=1, color="grey") # plot(recall, precision) - - ax.plot(px, py.mean(1), linewidth=3, color="blue", label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), ) - ax.set_xlabel("Recall") - ax.set_ylabel("Precision") - ax.set_xlim(0, 1) - ax.set_ylim(0, 1) - plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") - fig.savefig(Path(save_dir), dpi=250) - plt.close() - - -def plot_mc_curve(px, py, save_dir="mc_curve.png", names=(), xlabel="Confidence", ylabel="Metric"): - # Metric-confidence curve - fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) - - if 0 < len(names) < 21: # display per-class legend if < 21 classes - for i, y in enumerate(py): - ax.plot(px, y, linewidth=1, label=f"{names[i]}") # plot(confidence, metric) - else: - ax.plot(px, py.T, linewidth=1, color="grey") # plot(confidence, metric) - - y = py.mean(0) - ax.plot(px, y, linewidth=3, color="blue", label=f"all classes {y.max():.2f} at {px[y.argmax()]:.3f}", ) - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - ax.set_xlim(0, 1) - ax.set_ylim(0, 1) - plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") - fig.savefig(Path(save_dir), dpi=250) - plt.close() diff --git a/utils/segment/__init__.py b/utils/segment/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py new file mode 100644 index 000000000000..be788a81ea94 --- /dev/null +++ b/utils/segment/augmentations.py @@ -0,0 +1,114 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Image augmentation functions +""" + +import math +import random + +import cv2 +import numpy as np + +from ..general import segment2box, resample_segments +from ..augmentations import box_candidates + + +def random_perspective(im, + targets=(), + segments=(), + degrees=10, + translate=.1, + scale=.1, + shear=10, + perspective=0.0, + border=(0, 0)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = im.shape[0] + border[0] * 2 # shape(h,w,c) + width = im.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -im.shape[1] / 2 # x translation (pixels) + C[1, 2] = -im.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) + T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(im[:, :, ::-1]) # base + # ax[1].imshow(im2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + new_segments = [] + if n: + use_segments = any(x.any() for x in segments) + new = np.zeros((n, 4)) + if use_segments: # warp segments + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine + + # clip + new[i] = segment2box(xy, width, height) + new_segments.append(xy) + + else: # warp boxes + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T) + + # clip + new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) + new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) + + # filter candidates + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) + targets = targets[i] + targets[:, 1:5] = new[i] + new_segments = np.array(new_segments)[i] + + return im, targets, new_segments + + diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py new file mode 100644 index 000000000000..0230bcee13d2 --- /dev/null +++ b/utils/segment/dataloaders.py @@ -0,0 +1,305 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Dataloaders +""" + +import numpy as np +import cv2 +import random +import os +import torch + +from torch.utils.data import DataLoader +from torch.utils.data import distributed + +from ..augmentations import augment_hsv, copy_paste, letterbox, mixup +from ..dataloaders import LoadImagesAndLabels, InfiniteDataLoader, seed_worker +from ..general import xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER +from ..torch_utils import torch_distributed_zero_first +from .augmentations import random_perspective + + +def create_dataloader(path, + imgsz, + batch_size, + stride, + single_cls=False, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + image_weights=False, + quad=False, + prefix='', + shuffle=False, + mask_downsample_ratio=1, + overlap_mask=False): + if rect and shuffle: + LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False') + shuffle = False + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = LoadImagesAndLabelsAndMasks( + path, + imgsz, + batch_size, + augment=augment, # augmentation + hyp=hyp, # hyperparameters + rect=rect, # rectangular batches + cache_images=cache, + single_cls=single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + prefix=prefix, + downsample_ratio=mask_downsample_ratio, + overlap=overlap_mask) + + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() # number of CUDA devices + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates + generator = torch.Generator() + generator.manual_seed(0) + return loader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, + worker_init_fn=seed_worker, + generator=generator), dataset + + +class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing + def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, + cache_images=False, single_cls=False, stride=32, pad=0, prefix="", + downsample_ratio=1, overlap=False, + ): + super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, + stride, pad, prefix) + self.downsample_ratio = downsample_ratio + self.overlap = overlap + + def __getitem__(self, index): + index = self.indices[index] # linear, shuffled, or image_weights + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + masks = [] + if mosaic: + # Load mosaic + img, labels, segments = self.load_mosaic(index) + shapes = None + + # TODO: Mixup not support segment for now + # MixUp augmentation + if random.random() < hyp["mixup"]: + img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.num_imgs - 1))) + + else: + # Load image + img, (h0, w0), (h, w) = self.load_image(index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + labels = self.labels[index].copy() + # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy + segments = self.segments[index].copy() + if len(segments): + for i_s in range(len(segments)): + segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], ) + if labels.size: # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) + + if self.augment: + img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"], + translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], + return_seg=True, ) + + nl = len(labels) # number of labels + if nl: + labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) + if self.overlap: + masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, + downsample_ratio=self.downsample_ratio) + masks = masks[None] # (640, 640) -> (1, 640, 640) + labels = labels[sorted_idx] + else: + masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) + + masks = (torch.from_numpy(masks) if len(masks) else + torch.zeros(1 if self.overlap else nl, + img.shape[0] // self.downsample_ratio, + img.shape[1] // self.downsample_ratio)) + # TODO: albumentations support + if self.augment: + # Albumentations + # there are some augmentation that won't change boxes and masks, + # so just be it for now. + img, labels = self.albumentations(img, labels) + nl = len(labels) # update after albumentations + + # HSV color-space + augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) + + # Flip up-down + if random.random() < hyp["flipud"]: + img = np.flipud(img) + if nl: + labels[:, 2] = 1 - labels[:, 2] + masks = torch.flip(masks, dims=[1]) + + # Flip left-right + if random.random() < hyp["fliplr"]: + img = np.fliplr(img) + if nl: + labels[:, 1] = 1 - labels[:, 1] + masks = torch.flip(masks, dims=[2]) + + # Cutouts # labels = cutout(img, labels, p=0.5) + + labels_out = torch.zeros((nl, 6)) + if nl: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks) + + def load_mosaic(self, index): + # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic + labels4, segments4 = [], [] + s = self.img_size + yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y + + # 3 additional image indices + indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = self.load_image(index) + + # place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + labels, segments = self.labels[index].copy(), self.segments[index].copy() + + if labels.size: + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padw, padh) for x in segments] + labels4.append(labels) + segments4.extend(segments) + + # Concat/clip labels + labels4 = np.concatenate(labels4, 0) + for x in (labels4[:, 1:], *segments4): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img4, labels4 = replicate(img4, labels4) # replicate + + # Augment + img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) + img4, labels4, segments4 = random_perspective( + img4, + labels4, + segments4, + degrees=self.hyp["degrees"], + translate=self.hyp["translate"], + scale=self.hyp["scale"], + shear=self.hyp["shear"], + perspective=self.hyp["perspective"], + border=self.mosaic_border) # border to remove + return img4, labels4, segments4 + + @staticmethod + def collate_fn(batch): + img, label, path, shapes, masks = zip(*batch) # transposed + batched_masks = torch.cat(masks, 0) + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks + + +def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (np.ndarray): [N, M], N is the number of polygons, + M is the number of points(Be divided by 2). + """ + mask = np.zeros(img_size, dtype=np.uint8) + polygons = np.asarray(polygons) + polygons = polygons.astype(np.int32) + shape = polygons.shape + polygons = polygons.reshape(shape[0], -1, 2) + cv2.fillPoly(mask, polygons, color=color) + nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) + # NOTE: fillPoly firstly then resize is trying the keep the same way + # of loss calculation when mask-ratio=1. + mask = cv2.resize(mask, (nw, nh)) + return mask + + +def polygons2masks(img_size, polygons, color, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (list[np.ndarray]): each polygon is [N, M], + N is the number of polygons, + M is the number of points(Be divided by 2). + """ + masks = [] + for si in range(len(polygons)): + mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, + downsample_ratio) + masks.append(mask) + return np.array(masks) + + +def polygons2masks_overlap(img_size, segments, downsample_ratio=1): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), + dtype=np.uint8) + areas = [] + ms = [] + for si in range(len(segments)): + mask = polygon2mask( + img_size, + [segments[si].reshape(-1)], + downsample_ratio=downsample_ratio, + color=1, + ) + ms.append(mask) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + mask = ms[i] * (i + 1) + masks = masks + mask + masks = np.clip(masks, a_min=0, a_max=i + 1) + return masks, index diff --git a/utils/segment.py b/utils/segment/general.py similarity index 89% rename from utils/segment.py rename to utils/segment/general.py index d9773784eafa..70056a4bbb31 100644 --- a/utils/segment.py +++ b/utils/segment/general.py @@ -6,34 +6,8 @@ import torch.nn.functional as F import torchvision -from .general import xyxy2xywh, xywh2xyxy -from .seg_metrics import box_iou - - -def segment2box(segment, width=640, height=640): - # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) - x, y = segment.T # segment xy - inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) - x, y, = (x[inside], y[inside],) - return (np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4))) # xyxy - - -def segments2boxes(segments): - # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) - boxes = [] - for s in segments: - x, y = s.T # segment xy - boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy - return xyxy2xywh(np.array(boxes)) # cls, xywh - - -def resample_segments(segments, n=1000): - # Up-sample an (n,2) segment - for i, s in enumerate(segments): - x = np.linspace(0, len(s) - 1, n) - xp = np.arange(len(s)) - segments[i] = (np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T) # segment xy - return segments +from ..general import xywh2xyxy +from .metrics import box_iou def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, diff --git a/utils/seg_loss.py b/utils/segment/loss.py similarity index 55% rename from utils/seg_loss.py rename to utils/segment/loss.py index e0618f831e63..47fed765f990 100644 --- a/utils/seg_loss.py +++ b/utils/segment/loss.py @@ -1,14 +1,33 @@ -# TODO: merge with loss.py.. Optimize speed - import torch import torch.nn as nn import torch.nn.functional as F -from utils.general import xywh2xyxy -from utils.loss import smooth_BCE, FocalLoss -from utils.segment import masks_iou, crop -from utils.torch_utils import is_parallel +from ..general import xywh2xyxy +from ..loss import smooth_BCE, FocalLoss +from ..torch_utils import is_parallel +from ..metrics import bbox_iou +from .general import masks_iou, crop +class MaskIOULoss(nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False): + """ + Args: + pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n) + gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n) + mxyxy (torch.Tensor): ground truth of boxes, (n, 4) + """ + _, _, n = pred_mask.shape # same as gt_mask + pred_mask = pred_mask.sigmoid() + if mxyxy is not None: + pred_mask = crop(pred_mask, mxyxy) + gt_mask = crop(gt_mask, mxyxy) + pred_mask = pred_mask.permute(2, 0, 1).view(n, -1) + gt_mask = gt_mask.permute(2, 0, 1).view(n, -1) + iou = masks_iou(pred_mask, gt_mask) + return iou if return_iou else (1.0 - iou) class ComputeLoss: # Compute losses @@ -40,69 +59,9 @@ def __init__(self, model, autobalance=False, overlap=False): if hasattr(det, k): setattr(self, k, getattr(det, k)) - def __call__(self, p, targets, masks=None): # predictions, targets, model - if masks is not None: - return self.loss_segment(p, targets, masks) - return self.loss_detection(p, targets) - - def loss_detection(self, p, targets): - device = targets.device - lcls, lbox, lobj = ( - torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),) - tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets - - # Losses - for i, pi in enumerate(p): # layer index, layer predictions - b, a, gj, gi = indices[i] # image, anchor, gridy, gridx - tobj = torch.zeros_like(pi[..., 0], device=device) # target obj - - n = b.shape[0] # number of targets - if n: - ps = pi[b, a, gj, gi] # prediction subset corresponding to targets - - # Regression - pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 - pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] - pbox = torch.cat((pxy, pwh), 1) # predicted box - iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) - lbox += (1.0 - iou).mean() # iou loss - - # Objectness - score_iou = iou.detach().clamp(0).type(tobj.dtype) - if self.sort_obj_iou: - sort_id = torch.argsort(score_iou) - b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],) - tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio - - # Classification - if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets - t[range(n), tcls[i]] = self.cp - lcls += self.BCEcls(ps[:, 5:], t) # BCE - - # Append targets to text file # with open('targets.txt', 'a') as file: # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] - - obji = self.BCEobj(pi[..., 4], tobj) - lobj += obji * self.balance[i] # obj loss - if self.autobalance: - self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() - - if self.autobalance: - self.balance = [x / self.balance[self.ssi] for x in self.balance] - lbox *= self.hyp["box"] - lobj *= self.hyp["obj"] - lcls *= self.hyp["cls"] - bs = tobj.shape[0] # batch size - - return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach() - - def loss_segment(self, preds, targets, masks): - """ - proto_out:[batch-size, mask_dim, mask_hegiht, mask_width] - masks:[batch-size * num_objs, image_height, image_width] - 每张图片objects数量不同,到时候处理时填充不足的 - """ + def __call__(self, preds, targets, masks): # predictions, targets, model p = preds[0] + # [batch-size, mask_dim, mask_hegiht, mask_width] proto_out = preds[1] mask_h, mask_w = proto_out.shape[2:] proto_out = proto_out.permute(0, 2, 3, 1) @@ -111,7 +70,7 @@ def loss_segment(self, preds, targets, masks): lcls, lbox, lobj, lseg = ( torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device),) - tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets_for_masks(p, targets) # targets + tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx @@ -125,7 +84,7 @@ def loss_segment(self, preds, targets, masks): pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box - iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) + iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness @@ -142,8 +101,10 @@ def loss_segment(self, preds, targets, masks): lcls += self.BCEcls(ps[:, self.nm:], t) # BCE # Mask Regression + # TODO: + # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w] downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), mode="bilinear", - align_corners=False, ).squeeze(0) + align_corners=False).squeeze(0) mxywh = xywh[i] mws, mhs = mxywh[:, 2:].T @@ -196,7 +157,7 @@ def loss_segment(self, preds, targets, masks): return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): - """mask loss of single pic.""" + """mask loss of one single pic.""" # (80, 80, 32) @ (32, n) -> (80, 80, n) pred_mask = proto @ pred.tanh().T # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy) @@ -207,61 +168,6 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): return lseg.mean(), iou# + lseg_iou.mean() def build_targets(self, p, targets): - # Build targets for compute_loss(), input targets(image,class,x,y,w,h) - na, nt = self.na, targets.shape[0] # number of anchors, targets - tcls, tbox, indices, anch = [], [], [], [] - gain = torch.ones(7, device=targets.device) # normalized to gridspace gain - ai = ( - torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)) # same as .repeat_interleave(nt) - targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices - - g = 0.5 # bias - off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m - # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], device=targets.device, ).float() * g) # offsets - - for i in range(self.nl): - anchors, shape = self.anchors[i], p[i].shape - gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain - - # Match targets to anchors - t = targets * gain - if nt: - # Matches - r = t[:, :, 4:6] / anchors[:, None] # wh ratio - j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"] # compare - # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) - t = t[j] # filter - - # Offsets - gxy = t[:, 2:4] # grid xy - gxi = gain[[2, 3]] - gxy # inverse - j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T - l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T - j = torch.stack((torch.ones_like(j), j, k, l, m)) - t = t.repeat((5, 1, 1))[j] - offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] - else: - t = targets[0] - offsets = 0 - - # Define - b, c = t[:, :2].long().T # image, class - gxy = t[:, 2:4] # grid xy - gwh = t[:, 4:6] # grid wh - gij = (gxy - offsets).long() - gi, gj = gij.T # grid xy indices - - # Append - a = t[:, 6].long() # anchor indices - indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid - tbox.append(torch.cat((gxy - gij, gwh), 1)) # box - anch.append(anchors[a]) # anchors - tcls.append(c) # class - - return tcls, tbox, indices, anch - - def build_targets_for_masks(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], [] @@ -336,73 +242,3 @@ def build_targets_for_masks(self, p, targets): xywh.append(torch.cat((gxy, gwh), 1)) return tcls, tbox, indices, anch, tidxs, xywh - - -class MaskIOULoss(nn.Module): - def __init__(self) -> None: - super().__init__() - - def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False): - """ - Args: - pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n) - gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n) - mxyxy (torch.Tensor): ground truth of boxes, (n, 4) - """ - _, _, n = pred_mask.shape # same as gt_mask - pred_mask = pred_mask.sigmoid() - if mxyxy is not None: - pred_mask = crop(pred_mask, mxyxy) - gt_mask = crop(gt_mask, mxyxy) - pred_mask = pred_mask.permute(2, 0, 1).view(n, -1) - gt_mask = gt_mask.permute(2, 0, 1).view(n, -1) - iou = masks_iou(pred_mask, gt_mask) - return iou if return_iou else (1.0 - iou) - - -import math - - -def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): - # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 - box2 = box2.T - - # Get the coordinates of bounding boxes - if x1y1x2y2: # x1, y1, x2, y2 = box1 - b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] - else: # transform from xywh to xyxy - b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 - b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 - b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 - b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 - - # Intersection area - inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( - torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) - - # Union Area - w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps - w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps - union = w1 * h1 + w2 * h2 - inter + eps - - iou = inter / union - if GIoU or DIoU or CIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width - ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height - if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 - c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared - rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + ( - b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared - if DIoU: - return iou - rho2 / c2 # DIoU - elif (CIoU): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) - with torch.no_grad(): - alpha = v / (v - iou + (1 + eps)) - return iou - (rho2 / c2 + v * alpha) # CIoU - else: # GIoU https://arxiv.org/pdf/1902.09630.pdf - c_area = cw * ch + eps # convex area - return iou - (c_area - union) / c_area # GIoU - else: - return iou # IoU diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py new file mode 100644 index 000000000000..602623377402 --- /dev/null +++ b/utils/segment/metrics.py @@ -0,0 +1,149 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Model validation metrics +""" + +import numpy as np +from easydict import EasyDict as edict +from ..metrics import ap_per_class + + +def fitness(x, masks=False): + # Model fitness as a weighted combination of metrics + if masks: + w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] + return (x[:, :8] * w).sum(1) + w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ): + """ + Args: + tp_b: tp of boxes. + tp_m: tp of masks. + other arguments see `func: ap_per_class`. + """ + results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, + prefix="Box")[2:] + results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, + prefix="Mask")[2:] + + results = edict({ + "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3], + "ap_class": results_boxes[4]}, + "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3], + "ap_class": results_masks[4]}}) + return results + +class Metric: + def __init__(self) -> None: + self.p = [] # (nc, ) + self.r = [] # (nc, ) + self.f1 = [] # (nc, ) + self.all_ap = [] # (nc, 10) + self.ap_class_index = [] # (nc, ) + + @property + def ap50(self): + """AP@0.5 of all classes. + Return: + (nc, ) or []. + """ + return self.all_ap[:, 0] if len(self.all_ap) else [] + + @property + def ap(self): + """AP@0.5:0.95 + Return: + (nc, ) or []. + """ + return self.all_ap.mean(1) if len(self.all_ap) else [] + + @property + def mp(self): + """mean precision of all classes. + Return: + float. + """ + return self.p.mean() if len(self.p) else 0.0 + + @property + def mr(self): + """mean recall of all classes. + Return: + float. + """ + return self.r.mean() if len(self.r) else 0.0 + + @property + def map50(self): + """Mean AP@0.5 of all classes. + Return: + float. + """ + return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 + + @property + def map(self): + """Mean AP@0.5:0.95 of all classes. + Return: + float. + """ + return self.all_ap.mean() if len(self.all_ap) else 0.0 + + def mean_results(self): + """Mean of results, return mp, mr, map50, map""" + return (self.mp, self.mr, self.map50, self.map) + + def class_result(self, i): + """class-aware result, return p[i], r[i], ap50[i], ap[i]""" + return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) + + def get_maps(self, nc): + maps = np.zeros(nc) + self.map + for i, c in enumerate(self.ap_class_index): + maps[c] = self.ap[i] + return maps + + def update(self, results): + """ + Args: + results: tuple(p, r, ap, f1, ap_class) + """ + p, r, all_ap, f1, ap_class_index = results + self.p = p + self.r = r + self.all_ap = all_ap + self.f1 = f1 + self.ap_class_index = ap_class_index + + +class Metrics: + """Metric for boxes and masks.""" + + def __init__(self) -> None: + self.metric_box = Metric() + self.metric_mask = Metric() + + def update(self, results): + """ + Args: + results: Dict{'boxes': Dict{}, 'masks': Dict{}} + """ + self.metric_box.update(list(results["boxes"].values())) + self.metric_mask.update(list(results["masks"].values())) + + def mean_results(self): + return self.metric_box.mean_results() + self.metric_mask.mean_results() + + def class_result(self, i): + return self.metric_box.class_result(i) + self.metric_mask.class_result(i) + + def get_maps(self, nc): + return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) + + @property + def ap_class_index(self): + # boxes and masks have the same ap_class_index + return self.metric_box.ap_class_index From 1540351877733ddd413b64ef298a2eb9b4d14adc Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 11 Aug 2022 18:50:52 +0800 Subject: [PATCH 052/247] update val.py for segment --- segment/detect.py | 7 +- segment/train.py | 72 +++--- segment/val.py | 498 ++++++++++++++++++++++++++++++++++---- segment/val_new.py | 459 ----------------------------------- utils/loggers/__init__.py | 259 +++++--------------- utils/plots.py | 317 ------------------------ utils/segment/general.py | 2 +- utils/segment/metrics.py | 13 +- utils/segment/plots.py | 353 +++++++++++++++++++++++++++ 9 files changed, 914 insertions(+), 1066 deletions(-) delete mode 100644 segment/val_new.py create mode 100644 utils/segment/plots.py diff --git a/segment/detect.py b/segment/detect.py index d8e6150873f6..c751e39a06b8 100644 --- a/segment/detect.py +++ b/segment/detect.py @@ -33,7 +33,7 @@ import torch.backends.cudnn as cudnn FILE = Path(__file__).resolve() -ROOT = FILE.parents[0] # YOLOv5 root directory +ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative @@ -42,9 +42,10 @@ from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh) -from utils.plots import Annotator, colors, save_one_box, plot_masks +from utils.plots import Annotator, colors, save_one_box +from utils.segment.plots import plot_masks from utils.torch_utils import select_device, time_sync -from utils.segment import non_max_suppression_masks, scale_masks, process_mask_upsample +from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample @torch.no_grad() diff --git a/segment/train.py b/segment/train.py index 1bee611e4ec1..3a06915eb061 100644 --- a/segment/train.py +++ b/segment/train.py @@ -22,6 +22,7 @@ from datetime import datetime from pathlib import Path +import val # for end-of-epoch mAP import numpy as np import torch import torch.distributed as dist @@ -38,7 +39,6 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -import val # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors @@ -46,14 +46,14 @@ from utils.callbacks import Callbacks from utils.segment.dataloaders import create_dataloader from utils.downloads import attempt_download -from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, fitness, +from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) -from utils.loggers import NewLoggersMask +from utils.loggers import LoggersMask from utils.loggers.wandb.wandb_utils import check_wandb_resume from utils.segment.loss import ComputeLoss -#from utils.metrics import fitness +from utils.segment.metrics import fitness from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first @@ -66,7 +66,6 @@ from torch.optim import AdamW import yaml from datetime import datetime -from evaluator import Yolov5Evaluator def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary print(device) @@ -96,8 +95,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Loggers data_dict = None if RANK in {-1, 0}: - newloggers = NewLoggersMask - loggers = newloggers( + loggers = LoggersMask( save_dir=save_dir, opt=opt, logger=LOGGER ) # loggers instance @@ -157,16 +155,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") - evaluator = Yolov5Evaluator( - data = data, - single_cls=single_cls, - save_dir=save_dir, - mask=True, - verbose=False, - mask_downsample_ratio=mask_ratio, - plots=plots, - overlap=overlap - ) + g = [], [], [] # optimizer parameter groups bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() for v in model.modules(): @@ -407,7 +396,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mode="bilinear", align_corners=False, ).squeeze(0) - callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots, opt.sync_bn) + callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots) if callbacks.stop_training: return @@ -423,21 +412,25 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP - results, maps, _ = evaluator.run_training( - model=ema.ema, - dataloader=val_loader, - compute_loss=compute_loss, - ) + results, maps, _ = val.run(data_dict, + batch_size=batch_size // WORLD_SIZE * 2, + imgsz=imgsz, + model=ema.ema, + single_cls=single_cls, + dataloader=val_loader, + save_dir=save_dir, + plots=False, + callbacks=callbacks, + compute_loss=compute_loss, + mask_downsample_ratio=mask_ratio, + overlap=overlap) # Update best mAP - def fitness(x): - w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] - return (x[:, :8] * w).sum(1) fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr - callbacks.run('on_fit_epoch_end', log_vals, epoch) + callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save @@ -478,15 +471,26 @@ def fitness(x): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') - results, _, _ = evaluator.run_training( - model=attempt_load(f, device).half(), - dataloader=val_loader, - compute_loss=compute_loss, - ) # val best model with plots + results, _, _ = val.run( + data_dict, + batch_size=batch_size // WORLD_SIZE * 2, + imgsz=imgsz, + model=attempt_load(f, device).half(), + iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65 + single_cls=single_cls, + dataloader=val_loader, + save_dir=save_dir, + save_json=is_coco, + verbose=True, + plots=plots, + callbacks=callbacks, + compute_loss=compute_loss, + mask_downsample_ratio=1, + overlap=overlap) # val best model with plots if is_coco: - callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch) + callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) - callbacks.run('on_train_end', plots, epoch, masks=True) + callbacks.run('on_train_end', last, best, plots, epoch, results) torch.cuda.empty_cache() return results diff --git a/segment/val.py b/segment/val.py index 20183b6d7118..a2a4eb526773 100644 --- a/segment/val.py +++ b/segment/val.py @@ -3,81 +3,491 @@ Validate a trained YOLOv5 model accuracy on a custom dataset Usage: - $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640 + $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640 + +Usage - formats: + $ python path/to/val.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s.xml # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS-only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU """ import argparse -from evaluator import Yolov5Evaluator +import json +import os +import sys +from pathlib import Path + +import numpy as np +import torch +from tqdm import tqdm + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +import torch.nn.functional as F +import pycocotools.mask as mask_util +from models.common import DetectMultiBackend +from utils.callbacks import Callbacks +from utils.segment.dataloaders import create_dataloader +from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, + coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, + scale_coords, xywh2xyxy, xyxy2xywh) +from utils.segment.general import (non_max_suppression_masks, process_mask_upsample, mask_iou, + scale_masks, process_mask) +from utils.metrics import ConfusionMatrix, box_iou +from utils.segment.metrics import ap_per_class_box_and_mask, Metrics +from utils.segment.plots import plot_images_and_masks +from utils.plots import output_to_target, plot_val_study +from utils.torch_utils import select_device, time_sync, de_parallel + + +def save_one_txt(predn, save_conf, shape, file): + # Save one txt result + gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh + for *xyxy, conf, cls in predn.tolist(): + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(file, 'a') as f: + f.write(('%g ' * len(line)).rstrip() % line + '\n') + + +def save_one_json(predn, jdict, path, class_map, pred_masks): + # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} + image_id = int(path.stem) if path.stem.isnumeric() else path.stem + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + + pred_masks = np.transpose(pred_masks, (2, 0, 1)) + rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): + pred_dict = { + 'image_id': image_id, + 'category_id': class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'score': round(p[4], 5)} + pred_dict["segmentation"] = rles[i] + jdict.append(pred_dict) + + +def process_batch(detections, labels, iouv): + """ + Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. + Arguments: + detections (Array[N, 6]), x1, y1, x2, y2, conf, class + labels (Array[M, 5]), class, x1, y1, x2, y2 + Returns: + correct (Array[N, 10]), for 10 IoU levels + """ + correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) + iou = box_iou(labels[:, 1:], detections[:, :4]) + correct_class = labels[:, 0:1] == detections[:, 5] + for i in range(len(iouv)): + x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match + if x[0].shape[0]: + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + correct[matches[:, 1].astype(int), i] = True + return torch.tensor(correct, dtype=torch.bool, device=iouv.device) + + +def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): + correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) + # convert masks (1, 640, 640) -> (n, 640, 640) + if overlap: + nl = len(labels) + index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 + gt_masks = gt_masks.repeat(nl, 1, 1) + gt_masks = torch.where(gt_masks == index, 1.0, 0.0) + + if gt_masks.shape[1:] != pred_masks.shape[1:]: + gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear", + align_corners=False, ).squeeze(0) + + iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), ) + x = torch.where( + (iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + matches = torch.Tensor(matches).to(iouv.device) + correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv + return correct + + +@torch.no_grad() +def run( + data, + weights=None, # model.pt path(s) + batch_size=32, # batch size + imgsz=640, # inference size (pixels) + conf_thres=0.001, # confidence threshold + iou_thres=0.6, # NMS IoU threshold + task='val', # train, val, test, speed or study + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + workers=8, # max dataloader workers (per RANK in DDP mode) + single_cls=False, # treat as single-class dataset + augment=False, # augmented inference + verbose=False, # verbose output + save_txt=False, # save results to *.txt + save_hybrid=False, # save label+prediction hybrid results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_json=False, # save a COCO-JSON results file + project=ROOT / 'runs/val', # save to project/name + name='exp', # save to project/name + exist_ok=False, # existing project/name ok, do not increment + half=True, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + model=None, + dataloader=None, + save_dir=Path(''), + plots=True, + overlap=False, + mask_downsample_ratio=1, + callbacks=Callbacks(), + compute_loss=None, +): + process = process_mask_upsample if plots else process_mask + # Initialize/load model and set device + training = model is not None + if training: # called by train.py + device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model + half &= device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() + else: # called directly + device = select_device(device, batch_size=batch_size) + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine + imgsz = check_img_size(imgsz, s=stride) # check image size + half = model.fp16 # FP16 supported on limited backends with CUDA + if engine: + batch_size = model.batch_size + else: + device = model.device + if not (pt or jit): + batch_size = 1 # export.py models default to batch-size 1 + LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') + + # Data + data = check_dataset(data) # check + + # Configure + model.eval() + cuda = device.type != 'cpu' + is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset + nc = 1 if single_cls else int(data['nc']) # number of classes + iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 + niou = iouv.numel() + + # Dataloader + if not training: + if pt and not single_cls: # check --weights are trained on --data + ncm = model.model.nc + assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ + f'classes). Pass correct combination of --weights and --data that are trained together.' + model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup + pad = 0.0 if task in ('speed', 'benchmark') else 0.5 + rect = False if task == 'benchmark' else pt # square inference for benchmarks + task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images + dataloader = create_dataloader(data[task], + imgsz, + batch_size, + stride, + single_cls, + pad=pad, + rect=rect, + workers=workers, + prefix=colorstr(f'{task}: '), + overlap_mask=overlap, + mask_downsample_ratio=mask_downsample_ratio)[0] + + seen = 0 + confusion_matrix = ConfusionMatrix(nc=nc) + names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} + class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) + s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", + "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}") + dt = [0.0, 0.0, 0.0] + metrics = Metrics() + loss = torch.zeros(4, device=device) + jdict, stats = [], [] + callbacks.run('on_val_start') + pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar + for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar): + callbacks.run('on_val_batch_start') + t1 = time_sync() + if cuda: + im = im.to(device, non_blocking=True) + targets = targets.to(device) + masks = masks.to(device).float() + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + nb, _, height, width = im.shape # batch size, channels, height, width + t2 = time_sync() + dt[0] += t2 - t1 + + # Inference + out, train_out = model(im) if training else model(im, augment=augment, val=True) # inference, loss outputs + dt[1] += time_sync() - t2 + + # Loss + if compute_loss: + loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls -from utils.general import ( - set_logging, - print_args, - check_yaml, - check_requirements, -) + # NMS + targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels + lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling + t3 = time_sync() + out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, + mask_dim=de_parallel(model).model[-1].mask_dim) + dt[2] += time_sync() - t3 + + # keep pred masks for plotting + plot_masks = [] + # Metrics + for si, pred in enumerate(out): + labels = targets[targets[:, 0] == si, 1:] + midx = [si] if overlap else targets[:, 0] == si + gt_masks = masks[midx] + proto_out = train_out[1][si] + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], + shape=im[si].shape[1:]).permute(2, 0, 1).contiguous() + if plots and batch_i < 3: + plot_masks.append(pred_masks[:15].cpu()) + + nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions + path, shape = Path(paths[si]), shapes[si][0] + correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init + correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init + seen += 1 + + if npr == 0: + if nl: + stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) + continue + + # Predictions + if single_cls: + pred[:, 5] = 0 + predn = pred.clone() + scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred + + # Evaluate + if nl: + tbox = xywh2xyxy(labels[:, 1:5]) # target boxes + scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels + labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels + correct_bboxes = process_batch(predn, labelsn, iouv) + correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap) + if plots: + confusion_matrix.process_batch(predn, labelsn) + stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) + + # Save/log + if save_txt: + save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) + if save_json: + pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), + shape, shapes[si][1]) + save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary + callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) + + # Plot images + if plots and batch_i < 3: + if masks.shape[1:] != im.shape[2:]: + masks = F.interpolate( + masks.unsqueeze(0).float(), + im.shape[2:], + mode="bilinear", + align_corners=False, + ).squeeze(0) + plot_images_and_masks(im, targets, masks, paths, + save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels + plot_masks = torch.cat(plot_masks, dim=0) + plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, + save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred + + callbacks.run('on_val_batch_end') + + # Compute metrics + stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy + if len(stats) and stats[0].any(): + results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) + metrics.update(results) + nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class + else: + nt = torch.zeros(1) + + # Print results + pf = '%20s' + '%11i' * 2 + '%11.3g' * 8 # print format + LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results())) + + # Print results per class + if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): + for i, c in enumerate(metrics.ap_class_index): + LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i))) + + # Print speeds + t = tuple(x / seen * 1E3 for x in dt) # speeds per image + if not training: + shape = (batch_size, 3, imgsz, imgsz) + LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) + + # Plots + if plots: + confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) + callbacks.run('on_val_end') + + # in case the cocoeval will update map + ( + mp_bbox, + mr_bbox, + map50_bbox, + map_bbox, + mp_mask, + mr_mask, + map50_mask, + map_mask, + ) = metrics.mean_results() + # Save JSON + if save_json and len(jdict): + w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights + anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json + pred_json = str(save_dir / f"{w}_predictions.json") # predictions json + LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') + with open(pred_json, 'w') as f: + json.dump(jdict, f) + + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + check_requirements(['pycocotools']) + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval_bbox = COCOeval(anno, pred, 'bbox') + eval_mask = COCOeval(anno, pred, 'segm') + if is_coco: + eval_bbox.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate + eval_mask.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate + eval_bbox.evaluate() + eval_bbox.accumulate() + eval_bbox.summarize() + map_bbox, map50_bbox = eval_bbox.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) + + eval_mask.evaluate() + eval_mask.accumulate() + eval_mask.summarize() + map_mask, map50_mask = eval_mask.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) + except Exception as e: + LOGGER.info(f'pycocotools unable to run: {e}') + + # Return results + model.float() # for training + if not training: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") + final_metric = ( + mp_bbox, + mr_bbox, + map50_bbox, + map_bbox, + mp_mask, + mr_mask, + map50_mask, + map_mask, + ) + return ((*final_metric, *(loss.cpu() / len(dataloader)).tolist()), + metrics.get_maps(nc), t,) def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('-d', '--data', type=str, default='data/coco128.yaml', help='dataset.yaml path') - parser.add_argument('-w', '--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') - parser.add_argument('-b', '--batch-size', type=int, default=32, help='batch size') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)') + parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') + parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') - parser.add_argument('--nosave', action='store_true', help='do not save anything.') - parser.add_argument('--project', default='runs/val', help='save to project/name') + parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') - parser.add_argument('--overlap-mask', action='store_true', help='Eval overlapping masks') - + parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML opt.save_json |= opt.data.endswith('coco.yaml') + opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt + def main(opt): - set_logging() - check_requirements(exclude=("tensorboard", "thop")) - evaluator = Yolov5Evaluator( - data=opt.data, - conf_thres=opt.conf_thres, - iou_thres=opt.iou_thres, - device=opt.device, - single_cls=opt.single_cls, - augment=opt.augment, - verbose=opt.verbose, - project=opt.project, - name=opt.name, - exist_ok=opt.exist_ok, - half=opt.half, - mask=True, - nosave=opt.nosave, - overlap=opt.overlap_mask, - ) + check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop')) + + if opt.task in ('train', 'val', 'test'): # run normally + if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 + LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️')) + run(**vars(opt)) - if opt.task in ("train", "val", "test"): # run normally - evaluator.run( - weights=opt.weights, - batch_size=opt.batch_size, - imgsz=opt.imgsz, - save_txt=opt.save_txt, - save_conf=opt.save_conf, - save_json=opt.save_json, - task=opt.task, - ) else: - raise ValueError(f"not support task {opt.task}") + weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] + opt.half = True # FP16 for fastest results + if opt.task == 'speed': # speed benchmarks + # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt... + opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False + for opt.weights in weights: + run(**vars(opt), plots=False) + + elif opt.task == 'study': # speed vs mAP benchmarks + # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt... + for opt.weights in weights: + f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to + x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis + for opt.imgsz in x: # img-size + LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') + r, _, t = run(**vars(opt), plots=False) + y.append(r + t) # results and times + np.savetxt(f, y, fmt='%10.4g') # save + os.system('zip -r study.zip study_*.txt') + plot_val_study(x=x) # plot if __name__ == "__main__": diff --git a/segment/val_new.py b/segment/val_new.py deleted file mode 100644 index 033dec732bd8..000000000000 --- a/segment/val_new.py +++ /dev/null @@ -1,459 +0,0 @@ -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Validate a trained YOLOv5 model accuracy on a custom dataset - -Usage: - $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640 - -Usage - formats: - $ python path/to/val.py --weights yolov5s.pt # PyTorch - yolov5s.torchscript # TorchScript - yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn - yolov5s.xml # OpenVINO - yolov5s.engine # TensorRT - yolov5s.mlmodel # CoreML (macOS-only) - yolov5s_saved_model # TensorFlow SavedModel - yolov5s.pb # TensorFlow GraphDef - yolov5s.tflite # TensorFlow Lite - yolov5s_edgetpu.tflite # TensorFlow Edge TPU -""" - -import argparse -import json -import os -import sys -from pathlib import Path - -import numpy as np -import torch -from tqdm import tqdm - -FILE = Path(__file__).resolve() -ROOT = FILE.parents[0] # YOLOv5 root directory -if str(ROOT) not in sys.path: - sys.path.append(str(ROOT)) # add ROOT to PATH -ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative - -import torch.nn.functional as F -import pycocotools.mask as mask_util -from models.common import DetectMultiBackend -from utils.callbacks import Callbacks -from utils.segment.dataloaders import create_dataloader -from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, - coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, - scale_coords, xywh2xyxy, xyxy2xywh, de_parallel) -from utils.segment.general import non_max_suppression_masks, process_mask_upsample, mask_iou, scale_masks -from utils.metrics import ConfusionMatrix, ap_per_class, box_iou -from utils.segment.metrics import ap_per_class_box_and_mask, Metrics -from utils.plots import output_to_target, plot_images, plot_val_study -from utils.torch_utils import select_device, time_sync - - -def save_one_txt(predn, save_conf, shape, file): - # Save one txt result - gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh - for *xyxy, conf, cls in predn.tolist(): - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(file, 'a') as f: - f.write(('%g ' * len(line)).rstrip() % line + '\n') - - -def save_one_json(predn, jdict, path, class_map, pred_masks): - # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} - image_id = int(path.stem) if path.stem.isnumeric() else path.stem - box = xyxy2xywh(predn[:, :4]) # xywh - box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - - pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] - for rle in rles: - rle["counts"] = rle["counts"].decode("utf-8") - - for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): - pred_dict = { - 'image_id': image_id, - 'category_id': class_map[int(p[5])], - 'bbox': [round(x, 3) for x in b], - 'score': round(p[4], 5)} - pred_dict["segmentation"] = rles[i] - jdict.append(pred_dict) - - -def process_batch(detections, labels, iouv): - """ - Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. - Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 - Returns: - correct (Array[N, 10]), for 10 IoU levels - """ - correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) - iou = box_iou(labels[:, 1:], detections[:, :4]) - correct_class = labels[:, 0:1] == detections[:, 5] - for i in range(len(iouv)): - x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match - if x[0].shape[0]: - matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - correct[matches[:, 1].astype(int), i] = True - return torch.tensor(correct, dtype=torch.bool, device=iouv.device) - - -def process_batch_masks(self, predn, pred_masks, gt_masks, labels, iouv, overlap): - correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) - # convert masks (1, 640, 640) -> (n, 640, 640) - if overlap: - nl = len(labels) - index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 - gt_masks = gt_masks.repeat(nl, 1, 1) - gt_masks = torch.where(gt_masks == index, 1.0, 0.0) - - if gt_masks.shape[1:] != pred_masks.shape[1:]: - gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear", - align_corners=False, ).squeeze(0) - - iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), ) - x = torch.where( - (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match - if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(self.iouv.device) - correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv - return correct - - -@torch.no_grad() -def run( - data, - weights=None, # model.pt path(s) - batch_size=32, # batch size - imgsz=640, # inference size (pixels) - conf_thres=0.001, # confidence threshold - iou_thres=0.6, # NMS IoU threshold - task='val', # train, val, test, speed or study - device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu - workers=8, # max dataloader workers (per RANK in DDP mode) - single_cls=False, # treat as single-class dataset - augment=False, # augmented inference - verbose=False, # verbose output - save_txt=False, # save results to *.txt - save_hybrid=False, # save label+prediction hybrid results to *.txt - save_conf=False, # save confidences in --save-txt labels - save_json=False, # save a COCO-JSON results file - project=ROOT / 'runs/val', # save to project/name - name='exp', # save to project/name - exist_ok=False, # existing project/name ok, do not increment - half=True, # use FP16 half-precision inference - dnn=False, # use OpenCV DNN for ONNX inference - model=None, - dataloader=None, - save_dir=Path(''), - plots=True, - overlap=False, - mask_downsample_ratio=1, - callbacks=Callbacks(), - compute_loss=None, -): - # Initialize/load model and set device - training = model is not None - if training: # called by train.py - device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model - half &= device.type != 'cpu' # half precision only supported on CUDA - model.half() if half else model.float() - else: # called directly - device = select_device(device, batch_size=batch_size) - - # Directories - save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run - (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir - - # Load model - model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) - stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine - imgsz = check_img_size(imgsz, s=stride) # check image size - half = model.fp16 # FP16 supported on limited backends with CUDA - if engine: - batch_size = model.batch_size - else: - device = model.device - if not (pt or jit): - batch_size = 1 # export.py models default to batch-size 1 - LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') - - # Data - data = check_dataset(data) # check - - # Configure - model.eval() - cuda = device.type != 'cpu' - is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset - nc = 1 if single_cls else int(data['nc']) # number of classes - iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 - niou = iouv.numel() - - # Dataloader - if not training: - if pt and not single_cls: # check --weights are trained on --data - ncm = model.model.nc - assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ - f'classes). Pass correct combination of --weights and --data that are trained together.' - model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup - pad = 0.0 if task in ('speed', 'benchmark') else 0.5 - rect = False if task == 'benchmark' else pt # square inference for benchmarks - task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], - imgsz, - batch_size, - stride, - single_cls, - pad=pad, - rect=rect, - workers=workers, - prefix=colorstr(f'{task}: '), - mask_downsample_ratio=1, - overlap_mask=overlap, - mask_downsample_ratio=mask_downsample_ratio)[0] - - seen = 0 - confusion_matrix = ConfusionMatrix(nc=nc) - names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} - class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) - s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", - "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}") - dt = [0.0, 0.0, 0.0] - metrics = Metrics() - loss = torch.zeros(4, device=device) - jdict, stats = [], [] - callbacks.run('on_val_start') - pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar - for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar): - callbacks.run('on_val_batch_start') - t1 = time_sync() - if cuda: - im = im.to(device, non_blocking=True) - targets = targets.to(device) - masks = masks.to(device).float() - im = im.half() if half else im.float() # uint8 to fp16/32 - im /= 255 # 0 - 255 to 0.0 - 1.0 - nb, _, height, width = im.shape # batch size, channels, height, width - t2 = time_sync() - dt[0] += t2 - t1 - - # Inference - out, train_out = model(im) if training else model(im, augment=augment, val=True) # inference, loss outputs - dt[1] += time_sync() - t2 - - # Loss - if compute_loss: - loss += compute_loss([x.float() for x in train_out], targets, masks)[1] # box, obj, cls - - # NMS - targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels - lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling - t3 = time_sync() - out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, - mask_dim=de_parallel(model).model[-1].mask_dim) - dt[2] += time_sync() - t3 - - # Metrics - for si, pred in enumerate(out): - labels = targets[targets[:, 0] == si, 1:] - midx = [si] if overlap else targets[:, 0] == si - gt_masks = masks[midx] - proto_out = train_out[1][si] - pred_masks = process_mask_upsample(proto_out, pred[:, 6:], pred[:, :4], - shape=im[si].shape[1:]).permute(2, 0, 1).contiguous() - - nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions - path, shape = Path(paths[si]), shapes[si][0] - correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init - correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init - seen += 1 - - if npr == 0: - if nl: - stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) - continue - - # Predictions - if single_cls: - pred[:, 5] = 0 - predn = pred.clone() - scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred - - # Evaluate - if nl: - tbox = xywh2xyxy(labels[:, 1:5]) # target boxes - scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels - labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels - correct_bboxes = process_batch(predn, labelsn, iouv) - correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap) - if plots: - confusion_matrix.process_batch(predn, labelsn) - stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) - - # Save/log - if save_txt: - save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) - if save_json: - pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), - shape, shapes[si][1]) - save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary - callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) - - # Plot images - if plots and batch_i < 3: - # TODO: plot with masks - plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels - plot_images(im, output_to_target(out), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred - - callbacks.run('on_val_batch_end') - - # Compute metrics - stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy - if len(stats) and stats[0].any(): - results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) - metrics.update(results) - # ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 - # mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() - nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class - else: - nt = torch.zeros(1) - - # Print results - pf = '%20s' + '%11i' * 2 + '%11.3g' * 8 # print format - LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results())) - - # Print results per class - if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): - for i, c in enumerate(metrics.ap_class_index): - LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i))) - - # Print speeds - t = tuple(x / seen * 1E3 for x in dt) # speeds per image - if not training: - shape = (batch_size, 3, imgsz, imgsz) - LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) - - # Plots - if plots: - confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) - callbacks.run('on_val_end') - - # Save JSON - if save_json and len(jdict): - w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights - anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json - pred_json = str(save_dir / f"{w}_predictions.json") # predictions json - LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') - with open(pred_json, 'w') as f: - json.dump(jdict, f) - - try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb - check_requirements(['pycocotools']) - from pycocotools.coco import COCO - from pycocotools.cocoeval import COCOeval - - anno = COCO(anno_json) # init annotations api - pred = anno.loadRes(pred_json) # init predictions api - eval = COCOeval(anno, pred, 'bbox') - if is_coco: - eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate - eval.evaluate() - eval.accumulate() - eval.summarize() - # TODO: update these to metrics - map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) - except Exception as e: - LOGGER.info(f'pycocotools unable to run: {e}') - - # Return results - model.float() # for training - if not training: - s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' - LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") - return ((*metrics.mean_results(), *(loss.cpu() / len(dataloader)).tolist()), - metrics.get_maps(nc), t,) - # maps = np.zeros(nc) + map - # for i, c in enumerate(ap_class): - # maps[c] = ap[i] - # return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t - - -def parse_opt(): - parser = argparse.ArgumentParser() - parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)') - parser.add_argument('--batch-size', type=int, default=32, help='batch size') - parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') - parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') - parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') - parser.add_argument('--task', default='val', help='train, val, test, speed or study') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') - parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') - parser.add_argument('--augment', action='store_true', help='augmented inference') - parser.add_argument('--verbose', action='store_true', help='report mAP by class') - parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') - parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') - parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') - parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') - parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name') - parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') - parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') - parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') - opt = parser.parse_args() - opt.data = check_yaml(opt.data) # check YAML - opt.save_json |= opt.data.endswith('coco.yaml') - opt.save_txt |= opt.save_hybrid - print_args(vars(opt)) - return opt - - -def main(opt): - check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop')) - - if opt.task in ('train', 'val', 'test'): # run normally - if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 - LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️')) - run(**vars(opt)) - - else: - weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] - opt.half = True # FP16 for fastest results - if opt.task == 'speed': # speed benchmarks - # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt... - opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False - for opt.weights in weights: - run(**vars(opt), plots=False) - - elif opt.task == 'study': # speed vs mAP benchmarks - # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt... - for opt.weights in weights: - f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to - x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis - for opt.imgsz in x: # img-size - LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') - r, _, t = run(**vars(opt), plots=False) - y.append(r + t) # results and times - np.savetxt(f, y, fmt='%10.4g') # save - os.system('zip -r study.zip study_*.txt') - plot_val_study(x=x) # plot - - -if __name__ == "__main__": - opt = parse_opt() - main(opt) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 8e670a86b1b1..6e3696718b6b 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -3,7 +3,6 @@ Logging utils """ -from ast import Import import os import warnings @@ -13,7 +12,8 @@ from utils.general import colorstr, cv2, emojis from utils.loggers.wandb.wandb_utils import WandbLogger -from utils.plots import plot_images, plot_results, plot_results_with_masks, plot_images_and_masks +from utils.plots import plot_images, plot_results +from utils.segment.plots import plot_results_with_masks, plot_images_and_masks from utils.torch_utils import de_parallel LOGGERS = ('csv', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases @@ -158,7 +158,7 @@ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1: self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) - def on_train_end(self, last, best, plots, epoch, results, masks=False): + def on_train_end(self, last, best, plots, epoch, results): # Callback runs on training end # plot_results = plot_results_with_masks if masks else plot_results if plots: @@ -188,174 +188,10 @@ def on_params_update(self, params): if self.wandb: self.wandb.wandb_run.config.update(params, allow_val_change=True) -from threading import Thread - -class NewLoggers: - """Loggers without wandb, cause I don't really use `wandb` and `wandb` related codes are noisy.""" - def __init__( - self, - save_dir=None, - opt=None, - logger=None, - include=LOGGERS, - ): - self.save_dir = save_dir - self.opt = opt - self.logger = logger # for printing results to console - self.include = include - self.keys = [ - "train/box_loss", - "train/obj_loss", - "train/cls_loss", # train loss - "metrics/precision", - "metrics/recall", - "metrics/mAP_0.5", - "metrics/mAP_0.5:0.95", # metrics - "val/box_loss", - "val/obj_loss", - "val/cls_loss", # val loss - "x/lr0", - "x/lr1", - "x/lr2", - ] # params - self.best_keys = [ - "best/epoch", - "best/precision", - "best/recall", - "best/mAP_0.5", - "best/mAP_0.5:0.95", - ] - for k in LOGGERS: - setattr(self, k, None) # init empty logger dictionary - self.csv = True # always log to csv - - # TensorBoard - s = self.save_dir - if "tb" in self.include and s.exists(): - prefix = colorstr("TensorBoard: ") - self.logger.info( - f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/" - ) - self.tb = SummaryWriter(str(s)) - try: - import wandb - from wandb import __version__ - wandb.init(project=opt.project, name=opt.name, config=opt) - except ImportError: - wandb = None - pass - self.wandb = wandb - - def on_pretrain_routine_end(self): - pass - def on_train_batch_end( - self, ni, model, imgs, targets, masks, paths, plots, sync_bn, plot_idx - ): - # Callback runs on train batch end - if plots and self.save_dir.exists(): - if ni == 0: - if ( - not sync_bn - ): # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754 - with warnings.catch_warnings(): - warnings.simplefilter("ignore") # suppress jit trace warning - self.tb.add_graph( - torch.jit.trace( - de_parallel(model), imgs[0:1], strict=False - ), - [], - ) - if plot_idx is not None and ni in plot_idx: - f = self.save_dir / f"train_batch{ni}.jpg" # filename - Thread( - target=plot_images, args=(imgs, targets, paths, f), daemon=True - ).start() - # if ni < 3: - # f = self.save_dir / f'train_batch{ni}.jpg' # filename - # Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() - - def on_train_epoch_end(self, epoch): - # Callback runs on train epoch end - pass - - def on_val_image_end(self, imgs, targets, masks, paths): - # Callback runs on val image end - pass - - def on_val_end(self): - # Callback runs on val end - pass - - def on_fit_epoch_end(self, vals, epoch): - # Callback runs at the end of each fit (train+val) epoch - x = {k: v for k, v in zip(self.keys, vals)} # dict - if self.csv and self.save_dir.exists(): - file = self.save_dir / "results.csv" - n = len(x) + 1 # number of cols - s = ( - "" - if file.exists() - else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n") - ) # add header - with open(file, "a") as f: - f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n") - - if self.tb: - for k, v in x.items(): - self.tb.add_scalar(k, v, epoch) - if self.wandb: - wandb.log(x) - - def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): - # Callback runs on model save event - pass - - def on_train_end(self, plots, epoch, masks=False): - plts = plot_results_with_masks if masks else plot_results - # Callback runs on training end - if plots and self.save_dir.exists(): - plts(file=self.save_dir / "results.csv") # save results.png - files = [ - "results.png", - "confusion_matrix.png", - *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")), - ] - files = [ - (self.save_dir / f) for f in files if (self.save_dir / f).exists() - ] # filter - - if self.tb: - import cv2 - - for f in files: - self.tb.add_image( - f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats="HWC" - ) - if self.wandb: - best = self.save_dir/ "weights" / "best.pt" - last = self.save_dir / "weights" / "last.pt" - wandb.log_artifact(str(best if best.exists() else last), - type='model', - name=f'run_{self.wandb.run.id}_model', - aliases=['latest', 'best', 'stripped']) - self.wandb.finish() - - - def on_params_update(self): - # Update hyperparams or configs of the experiment - # params: A dict containing {param: value} pairs - pass - -class NewLoggersMask(NewLoggers): - def __init__( - self, - save_dir=None, - opt=None, - logger=None, - include=LOGGERS, - ): - super().__init__(save_dir, opt, logger, include) +class LoggersMask(Loggers): + def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS): + super().__init__(save_dir, weights, opt, hyp, logger, include) self.keys = [ "train/box_loss", "train/seg_loss", # train loss @@ -379,50 +215,73 @@ def __init__( ] # params self.best_keys = [ "best/epoch", - "best/precision", - "best/recall", - "best/mAP_0.5", - "best/mAP_0.5:0.95", + "best/precision(B)", + "best/recall(B)", + "best/mAP_0.5(B)", + "best/mAP_0.5:0.95(B)", + "best/precision(M)", + "best/recall(M)", + "best/mAP_0.5(M)", + "best/mAP_0.5:0.95(M)", ] - def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots, sync_bn): - # Callback runs on train batch end - if plots and self.save_dir.exists(): + def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots): + if plots: if ni == 0: - if ( - not sync_bn - ): # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754 + if self.tb and not self.opt.sync_bn: # --sync known issue https://github.com/ultralytics/yolov5/issues/3754 with warnings.catch_warnings(): - warnings.simplefilter("ignore") # suppress jit trace warning - self.tb.add_graph( - torch.jit.trace( - de_parallel(model), imgs[0:1], strict=False - ), - [], - ) + warnings.simplefilter('ignore') # suppress jit trace warning + self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), []) if ni < 3: f = self.save_dir / f"train_batch{ni}.jpg" # filename plot_images_and_masks(imgs, targets, masks, paths, f) - + if self.wandb and ni == 10: + files = sorted(self.save_dir.glob('train*.jpg')) + self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]}) - def on_fit_epoch_end(self, vals, epoch): + def on_fit_epoch_end(self, vals, epoch, best_fitness, fi): # Callback runs at the end of each fit (train+val) epoch - x = {k: v for k, v in zip(self.keys, vals)} # dict - if self.csv and self.save_dir.exists(): - file = self.save_dir / "results.csv" + x = dict(zip(self.keys, vals)) + if self.csv: + file = self.save_dir / 'results.csv' n = len(x) + 1 # number of cols - s = ( - "" - if file.exists() - else (("%20s," * n % tuple(["epoch"] + self.keys)).rstrip(",") + "\n") - ) # add header - with open(file, "a") as f: - f.write(s + ("%20.5g," * n % tuple([epoch] + vals)).rstrip(",") + "\n") + s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header + with open(file, 'a') as f: + f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') if self.tb: for k, v in x.items(): self.tb.add_scalar(k, v, epoch) + + if self.wandb: + if best_fitness == fi: + best_results = [epoch] + vals[4:12] + for i, name in enumerate(self.best_keys): + self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary + self.wandb.log(x) + self.wandb.end_epoch(best_result=best_fitness == fi) + + def on_train_end(self, last, best, plots, epoch, results): + # Callback runs on training end + if plots: + plot_results_with_masks(file=self.save_dir / 'results.csv') # save results.png + files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] + files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter + self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}") + + if self.tb: + for f in files: + self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') + if self.wandb: - wandb.log(x, step=epoch, commit=True) + self.wandb.log(dict(zip(self.keys[4:16], results))) + self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) + # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model + if not self.opt.evolve: + wandb.log_artifact(str(best if best.exists() else last), + type='model', + name=f'run_{self.wandb.wandb_run.id}_model', + aliases=['latest', 'best', 'stripped']) + self.wandb.finish_run() diff --git a/utils/plots.py b/utils/plots.py index 49d014abd9e0..1747a83284e4 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -644,62 +644,6 @@ def result(self): # Return annotated image as array return np.asarray(self.im) -class Visualizer(object): - """Visualization of one model.""" - def __init__(self, names) -> None: - super().__init__() - self.names = names - - def draw_one_img(self, img, output, vis_conf=0.4): - """Visualize one images. - - Args: - imgs (numpy.ndarray): one image. - outputs (torch.Tensor): one output, (num_boxes, classes+5) - vis_confs (float, optional): Visualize threshold. - Return: - img (numpy.ndarray): Image after visualization. - """ - if isinstance(output, list): - output = output[0] - if output is None or len(output) == 0: - return img - for (*xyxy, conf, cls) in reversed(output[:, :6]): - if conf < vis_conf: - continue - label = '%s %.2f' % (self.names[int(cls)], conf) - color = colors(int(cls)) - plot_one_box(xyxy, img, label=label, - color=color, - line_thickness=2) - return img - - def draw_multi_img(self, imgs, outputs, vis_confs=0.4): - """Visualize multi images. - - Args: - imgs (List[numpy.array]): multi images. - outputs (List[torch.Tensor]): multi outputs, List[num_boxes, classes+5]. - vis_confs (float | tuple[float], optional): Visualize threshold. - Return: - imgs (List[numpy.ndarray]): Images after visualization. - """ - if isinstance(vis_confs, float): - vis_confs = list(repeat(vis_confs, len(imgs))) - assert len(imgs) == len(outputs) == len(vis_confs) - for i, output in enumerate(outputs): # detections per image - self.draw_one_img(imgs[i], output, vis_confs[i]) - return imgs - - def draw_imgs(self, imgs, outputs, vis_confs=0.4): - if isinstance(imgs, np.ndarray): - return self.draw_one_img(imgs, outputs, vis_confs) - else: - return self.draw_multi_img(imgs, outputs, vis_confs) - - def __call__(self, imgs, outputs, vis_confs=0.4): - return self.draw_imgs(imgs, outputs, vis_confs) - def hist2d(x, y, n=100): # 2d histogram used in labels.png and evolve.png @@ -1078,52 +1022,6 @@ def plot_results(file="path/to/results.csv", dir="", best=True): plt.close() -def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): - # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') - save_dir = Path(file).parent if file else Path(dir) - fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) - ax = ax.ravel() - files = list(save_dir.glob("results*.csv")) - assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." - for _, f in enumerate(files): - try: - data = pd.read_csv(f) - index = np.argmax( - 0.9 * data.values[:, 8] - + 0.1 * data.values[:, 7] - + 0.9 * data.values[:, 12] - + 0.1 * data.values[:, 11], - ) - s = [x.strip() for x in data.columns] - x = data.values[:, 0] - for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): - y = data.values[:, j] - # y[y == 0] = np.nan # don't show zero values - ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) - if best: - # best - ax[i].scatter( - index, - y[index], - color="r", - label=f"best:{index}", - marker="*", - linewidth=3, - ) - ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") - else: - # last - ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) - ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") - # if j in [8, 9, 10]: # share train and val loss y axes - # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) - except Exception as e: - print(f"Warning: Plotting error for {f}: {e}") - ax[1].legend() - fig.savefig(save_dir / "results.png", dpi=200) - plt.close() - - def plot_one_box(x, img, color=None, label=None, line_thickness=None): import random @@ -1178,218 +1076,3 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec print(f"Saving {save_dir / f}... ({n}/{channels})") plt.savefig(save_dir / f, dpi=300, bbox_inches="tight") plt.close() - - -def plot_images_and_masks( - images, - targets, - masks, - paths=None, - fname="images.jpg", - names=None, - max_size=640, - max_subplots=16, -): - # Plot image grid with labels - # print("targets:", targets.shape) - # print("masks:", masks.shape) - # print('--------------------------') - - if isinstance(images, torch.Tensor): - images = images.cpu().float().numpy() - if isinstance(targets, torch.Tensor): - targets = targets.cpu().numpy() - if isinstance(masks, torch.Tensor): - masks = masks.cpu().numpy() - masks = masks.astype(int) - - # un-normalise - if np.max(images[0]) <= 1: - images *= 255 - - tl = 3 # line thickness - tf = max(tl - 1, 1) # font thickness - bs, _, h, w = images.shape # batch size, _, height, width - bs = min(bs, max_subplots) # limit plot images - ns = np.ceil(bs ** 0.5) # number of subplots (square) - - # Check if we should resize - scale_factor = max_size / max(h, w) - if scale_factor < 1: - h = math.ceil(scale_factor * h) - w = math.ceil(scale_factor * w) - - mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, img in enumerate(images): - if i == max_subplots: # if last batch has fewer images than we expect - break - - block_x = int(w * (i // ns)) - block_y = int(h * (i % ns)) - - img = img.transpose(1, 2, 0) - if scale_factor < 1: - img = cv2.resize(img, (w, h)) - - mosaic[block_y : block_y + h, block_x : block_x + w, :] = img - if len(targets) > 0: - idx = (targets[:, 0]).astype(int) - image_targets = targets[idx == i] - - if masks.max() > 1.0: # mean that masks are overlap - image_masks = masks[[i]] # (1, 640, 640) - # convert masks (1, 640, 640) -> (n, 640, 640) - nl = len(image_targets) - index = np.arange(nl).reshape(nl, 1, 1) + 1 - image_masks = np.repeat(image_masks, nl, axis=0) - image_masks = np.where(image_masks == index, 1.0, 0.0) - else: - image_masks = masks[idx == i] - - boxes = xywh2xyxy(image_targets[:, 2:6]).T - classes = image_targets[:, 1].astype("int") - labels = image_targets.shape[1] == 6 # labels if no conf column - conf = ( - None if labels else image_targets[:, 6] - ) # check for confidence presence (label vs pred) - - if boxes.shape[1]: - if boxes.max() <= 1.01: # if normalized with tolerance 0.01 - boxes[[0, 2]] *= w # scale to pixels - boxes[[1, 3]] *= h - elif scale_factor < 1: # absolute coords need scale if image scales - boxes *= scale_factor - boxes[[0, 2]] += block_x - boxes[[1, 3]] += block_y - for j, box in enumerate(boxes.T): - cls = int(classes[j]) - color = colors(cls) - cls = names[cls] if names else cls - mask = image_masks[j].astype(np.bool) - # print(mask.shape) - # print(mosaic.shape) - if labels or conf[j] > 0.25: # 0.25 conf thresh - label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j]) - plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) - mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[ - block_y : block_y + h, block_x : block_x + w, : - ][mask] * 0.35 + (np.array(color) * 0.65) - - # Draw image filename labels - if paths: - label = Path(paths[i]).name[:40] # trim to 40 char - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - cv2.putText( - mosaic, - label, - (block_x + 5, block_y + t_size[1] + 5), - 0, - tl / 3, - [220, 220, 220], - thickness=tf, - lineType=cv2.LINE_AA, - ) - - # Image border - cv2.rectangle( - mosaic, - (block_x, block_y), - (block_x + w, block_y + h), - (255, 255, 255), - thickness=3, - ) - - if fname: - r = min(1280.0 / max(h, w) / ns, 1.0) # ratio to limit image size - mosaic = cv2.resize( - mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA - ) - # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save - with Image.fromarray(mosaic) as im: - im.save(fname) - return mosaic - - -def plot_images_boxes_and_masks( - images, - targets, - masks=None, - paths=None, - fname="images.jpg", - names=None, - max_size=640, - max_subplots=16, -): - if masks is not None: - return plot_images_and_masks(images, targets, masks, paths, fname, names, max_size, max_subplots) - else: - return plot_images(images, targets, paths, fname, names, max_size, max_subplots) - - -def plot_masks(img, masks, colors, alpha=0.5): - """ - Args: - img (tensor): img on cuda, shape: [3, h, w], range: [0, 1] - masks (tensor): predicted masks on cuda, shape: [n, h, w] - colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] - Return: - img after draw masks, shape: [h, w, 3] - - transform colors and send img_gpu to cpu for the most time. - """ - img_gpu = img.clone() - num_masks = len(masks) - # [n, 1, 1, 3] - # faster this way to transform colors - colors = torch.tensor(colors, device=img.device).float() / 255.0 - colors = colors[:, None, None, :] - # [n, h, w, 1] - masks = masks[:, :, :, None] - masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha - inv_alph_masks = masks * (-alpha) + 1 - masks_color_summand = masks_color[0] - if num_masks > 1: - inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0) - masks_color_cumul = masks_color[1:] * inv_alph_cumul - masks_color_summand += masks_color_cumul.sum(dim=0) - - # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1] - img_gpu = img_gpu.flip(dims=[0]) # filp channel for opencv - img_gpu = img_gpu.permute(1, 2, 0).contiguous() - # [h, w, 3] - img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand - return (img_gpu * 255).byte().cpu().numpy() - -def visualize(self, images, outputs, out_masks, vis_confs=0.4): - """Image visualize - if images is a List of ndarray, then will return a List. - if images is a ndarray, then return ndarray. - Args: - outputs: bbox+conf+cls, List[torch.Tensor(num_boxes, 6)]xB. - masks: binary masks, List[torch.Tensor(num_boxes, img_h, img_w)]xB. - """ - ori_type = type(images) - # get original shape, cause self.ori_hw will be cleared - images = images if isinstance(images, list) else [images] - ori_hw = [img.shape[:2] for img in images] - # init the list to keep image with masks. - # TODO: fix this bug when output is empty. - masks_images = [] - # draw masks - for i, output in enumerate(outputs): - if output is None or len(output) == 0: - continue - idx = output[:, 4] > vis_confs - masks = out_masks[i][idx] - mcolors = [colors(int(cls)) for cls in output[:, 5]] - # NOTE: this way to draw masks is faster, - # from https://github.com/dbolya/yolact - # image with masks, (img_h, img_w, 3) - img_masks = plot_masks(self.imgs[i], masks, mcolors) - # scale image to original hw - from utils.segment import scale_masks - img_masks = scale_masks(self.imgs[i].shape[1:], img_masks, ori_hw[i]) - masks_images.append(img_masks) - # TODO: make this(ori_type stuff) clean - images = masks_images[0] if (len(masks_images) == 1) and type(masks_images) != ori_type else images[0] - return self.vis(images, outputs, vis_confs) diff --git a/utils/segment/general.py b/utils/segment/general.py index 70056a4bbb31..d24b263bcc59 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -7,7 +7,7 @@ import torchvision from ..general import xywh2xyxy -from .metrics import box_iou +from ..metrics import box_iou def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py index 602623377402..a3c0acd23920 100644 --- a/utils/segment/metrics.py +++ b/utils/segment/metrics.py @@ -8,13 +8,10 @@ from ..metrics import ap_per_class -def fitness(x, masks=False): +def fitness(x): # Model fitness as a weighted combination of metrics - if masks: - w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] - return (x[:, :8] * w).sum(1) - w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (x[:, :4] * w).sum(1) + w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] + return (x[:, :8] * w).sum(1) def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ): @@ -30,9 +27,9 @@ def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False prefix="Mask")[2:] results = edict({ - "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[2], "f1": results_boxes[3], + "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[3], "f1": results_boxes[2], "ap_class": results_boxes[4]}, - "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[2], "f1": results_masks[3], + "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[3], "f1": results_masks[2], "ap_class": results_masks[4]}}) return results diff --git a/utils/segment/plots.py b/utils/segment/plots.py new file mode 100644 index 000000000000..77fb983fe8d1 --- /dev/null +++ b/utils/segment/plots.py @@ -0,0 +1,353 @@ +import cv2 +import torch +import math +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +from pathlib import Path +from PIL import Image + +from ..plots import colors, Annotator +from ..general import xywh2xyxy + + +def plot_masks(img, masks, colors, alpha=0.5): + """ + Args: + img (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] + masks (tensor): predicted masks on cuda, shape: [n, h, w] + colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] + Return: + ndarray: img after draw masks, shape: [h, w, 3] + + transform colors and send img_gpu to cpu for the most time. + """ + img_gpu = img.clone() + num_masks = len(masks) + if num_masks == 0: + return img.permute(1, 2, 0).contiguous().cpu().numpy() * 255 + + # [n, 1, 1, 3] + # faster this way to transform colors + colors = torch.tensor(colors, device=img.device).float() / 255.0 + colors = colors[:, None, None, :] + # [n, h, w, 1] + masks = masks[:, :, :, None] + masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha + inv_alph_masks = masks * (-alpha) + 1 + masks_color_summand = masks_color[0] + if num_masks > 1: + inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0) + masks_color_cumul = masks_color[1:] * inv_alph_cumul + masks_color_summand += masks_color_cumul.sum(dim=0) + + # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1] + img_gpu = img_gpu.flip(dims=[0]) # filp channel for opencv + img_gpu = img_gpu.permute(1, 2, 0).contiguous() + # [h, w, 3] + img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand + return (img_gpu * 255).byte().cpu().numpy() + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + import random + + # Plots one bounding box on image img + tl = ( + line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 + ) # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText( + img, + label, + (c1[0], c1[1] - 2), + 0, + tl / 3, + [225, 255, 255], + thickness=tf, + lineType=cv2.LINE_AA, + ) + +def plot_images_and_masks( + images, + targets, + masks, + paths=None, + fname="images.jpg", + names=None, + max_size=640, + max_subplots=16, +): + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + if isinstance(masks, torch.Tensor): + masks = masks.cpu().numpy() + masks = masks.astype(int) + + # un-normalise + if np.max(images[0]) <= 1: + images *= 255 + + tl = 3 # line thickness + tf = max(tl - 1, 1) # font thickness + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + + # Check if we should resize + scale_factor = max_size / max(h, w) + if scale_factor < 1: + h = math.ceil(scale_factor * h) + w = math.ceil(scale_factor * w) + + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, img in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + + block_x = int(w * (i // ns)) + block_y = int(h * (i % ns)) + + img = img.transpose(1, 2, 0) + if scale_factor < 1: + img = cv2.resize(img, (w, h)) + + mosaic[block_y : block_y + h, block_x : block_x + w, :] = img + if len(targets) > 0: + idx = (targets[:, 0]).astype(int) + image_targets = targets[idx == i] + + if masks.max() > 1.0: # mean that masks are overlap + image_masks = masks[[i]] # (1, 640, 640) + # convert masks (1, 640, 640) -> (n, 640, 640) + nl = len(image_targets) + index = np.arange(nl).reshape(nl, 1, 1) + 1 + image_masks = np.repeat(image_masks, nl, axis=0) + image_masks = np.where(image_masks == index, 1.0, 0.0) + else: + image_masks = masks[idx == i] + + boxes = xywh2xyxy(image_targets[:, 2:6]).T + classes = image_targets[:, 1].astype("int") + labels = image_targets.shape[1] == 6 # labels if no conf column + conf = ( + None if labels else image_targets[:, 6] + ) # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale_factor < 1: # absolute coords need scale if image scales + boxes *= scale_factor + boxes[[0, 2]] += block_x + boxes[[1, 3]] += block_y + for j, box in enumerate(boxes.T): + cls = int(classes[j]) + color = colors(cls) + cls = names[cls] if names else cls + if scale_factor < 1: + mask = image_masks[j].astype(np.uint8) + mask = cv2.resize(mask, (w, h)) + mask = mask.astype(np.bool) + else: + mask = image_masks[j].astype(np.bool) + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j]) + plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) + mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[ + block_y : block_y + h, block_x : block_x + w, : + ][mask] * 0.35 + (np.array(color) * 0.65) + + # Draw image filename labels + if paths: + label = Path(paths[i]).name[:40] # trim to 40 char + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + cv2.putText( + mosaic, + label, + (block_x + 5, block_y + t_size[1] + 5), + 0, + tl / 3, + [220, 220, 220], + thickness=tf, + lineType=cv2.LINE_AA, + ) + + # Image border + cv2.rectangle( + mosaic, + (block_x, block_y), + (block_x + w, block_y + h), + (255, 255, 255), + thickness=3, + ) + + if fname: + r = min(1280.0 / max(h, w) / ns, 1.0) # ratio to limit image size + mosaic = cv2.resize( + mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA + ) + # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save + with Image.fromarray(mosaic) as im: + im.save(fname) + return mosaic + +# def plot_images_and_masks( +# images, +# targets, +# masks, +# paths=None, +# fname="images.jpg", +# names=None, +# max_size=640, +# max_subplots=16, +# ): +# # plot masks first in torch way, +# # this is faster if masks are in cuda. +# masks = torch.as_tensor(masks, dtype=torch.float32) +# images = torch.as_tensor(images, dtype=torch.float32, device=masks.device) +# if isinstance(targets, torch.Tensor): +# targets = targets.cpu().numpy() +# +# # normalize +# if images[0].max() > 1: +# images /= 255 +# +# images_with_masks = [] +# for i, img in enumerate(images): +# if len(targets) == 0: +# continue +# idx = (targets[:, 0]).astype(int) +# image_targets = targets[idx == i] +# mcolors = np.array([colors(int(cls), bgr=True) for cls in image_targets[:, 1]]) +# labels = image_targets.shape[1] == 6 # labels if no conf column +# conf = ( +# None if labels else image_targets[:, 6] +# ) # check for confidence presence (label vs pred) +# +# if masks.max() > 1.0: # mean that masks are overlap +# image_masks = masks[[i]] # (1, 640, 640) +# # convert masks (1, 640, 640) -> (n, 640, 640) +# nl = len(image_targets) +# index = torch.arange(nl, device=image_masks.device).view(nl, 1, 1) + 1 +# image_masks = image_masks.repeat(nl, 1, 1) +# image_masks = torch.where(image_masks == index, 1.0, 0.0) +# else: +# image_masks = masks[idx == i] +# if conf is not None: +# image_masks = image_masks[conf > 0.25] +# mcolors = mcolors[conf > 0.25] +# image_with_masks = plot_masks(img, image_masks, mcolors) +# images_with_masks.append(image_with_masks[..., ::-1]) +# images = np.stack(images_with_masks, axis=0) +# +# bs, h, w, _,= images.shape # batch size, _, height, width +# bs = min(bs, max_subplots) # limit plot images +# ns = np.ceil(bs ** 0.5) # number of subplots (square) +# +# mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init +# for i, im in enumerate(images): +# if i == max_subplots: # if last batch has fewer images than we expect +# break +# x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin +# mosaic[y : y + h, x : x + w, :] = im +# +# # Resize (optional) +# scale = max_size / ns / max(h, w) +# if scale < 1: +# h = math.ceil(scale * h) +# w = math.ceil(scale * w) +# mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) +# +# # Annotate +# fs = int((h + w) * ns * 0.01) # font size +# annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True) +# for i in range(i + 1): +# x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin +# annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders +# if paths: +# annotator.text( +# (x + 5, y + 5 + h), +# text=Path(paths[i]).name[:40], +# txt_color=(220, 220, 220), +# ) # filenames +# if len(targets) > 0: +# ti = targets[targets[:, 0] == i] # image targets +# boxes = xywh2xyxy(ti[:, 2:6]).T +# classes = ti[:, 1].astype("int") +# labels = ti.shape[1] == 6 # labels if no conf column +# conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) +# +# if boxes.shape[1]: +# if boxes.max() <= 1.01: # if normalized with tolerance 0.01 +# boxes[[0, 2]] *= w # scale to pixels +# boxes[[1, 3]] *= h +# elif scale < 1: # absolute coords need scale if image scales +# boxes *= scale +# boxes[[0, 2]] += x +# boxes[[1, 3]] += y +# for j, box in enumerate(boxes.T.tolist()): +# cls = classes[j] +# color = colors(cls) +# cls = names[cls] if names else cls +# if labels or conf[j] > 0.25: # 0.25 conf thresh +# label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}" +# annotator.box_label(box, label, color=color) +# annotator.im.save(fname) # save +# return annotator.result() + + +def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob("results*.csv")) + assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." + for _, f in enumerate(files): + try: + data = pd.read_csv(f) + index = np.argmax( + 0.9 * data.values[:, 8] + + 0.1 * data.values[:, 7] + + 0.9 * data.values[:, 12] + + 0.1 * data.values[:, 11], + ) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): + y = data.values[:, j] + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) + if best: + # best + ax[i].scatter( + index, + y[index], + color="r", + label=f"best:{index}", + marker="*", + linewidth=3, + ) + ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") + else: + # last + ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) + ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") + # if j in [8, 9, 10]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print(f"Warning: Plotting error for {f}: {e}") + ax[1].legend() + fig.savefig(save_dir / "results.png", dpi=200) + plt.close() + From 9d5bbf77732ed19b3b3dbf4349a8f1fe9813a5df Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 11 Aug 2022 19:31:15 +0800 Subject: [PATCH 053/247] revert yolo.py&&remove evaluator.py --- models/yolo.py | 18 +- segment/evaluator.py | 616 ------------------------------------------- segment/val.py | 2 +- 3 files changed, 2 insertions(+), 634 deletions(-) delete mode 100644 segment/evaluator.py diff --git a/models/yolo.py b/models/yolo.py index cd9248e7c8c2..f991cdc7ec66 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -14,8 +14,6 @@ from copy import deepcopy from pathlib import Path -from torch import NoneType - FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: @@ -30,7 +28,6 @@ from utils.plots import feature_visualization from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync) -import torch.nn.functional as F try: import thop # for FLOPs computation @@ -110,8 +107,7 @@ def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inp # nn.SiLU(inplace=True), # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), # nn.SiLU(inplace=True), - # nn.Upsample(scale_factor=2, mode='nearest'), - Upsample(scale_factor=2, mode='bilinear', align_corners=False), + nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), nn.SiLU(inplace=True), nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0), @@ -382,18 +378,6 @@ def parse_model(d, ch): # model_dict, input_channels(3) ch.append(c2) return nn.Sequential(*layers), sorted(save) -class Upsample(nn.Module): - ''' - deterministic upsample layer - ''' - def __init__(self, scale_factor, mode="bilinear", align_corners=False) -> None: - super().__init__() - self.scale_factor = scale_factor - self.mode = mode - self.align_corners = align_corners - - def forward(self, x): - return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners) if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/segment/evaluator.py b/segment/evaluator.py deleted file mode 100644 index acf8f94e42cf..000000000000 --- a/segment/evaluator.py +++ /dev/null @@ -1,616 +0,0 @@ -# TODO: Optimize plotting, losses & merge with val.py - -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Validate a trained YOLOv5 model accuracy on a custom dataset - -Usage: - $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640 -""" - -import json -from pathlib import Path -from threading import Thread - -import numpy as np -import torch -import torch.nn.functional as F -import pycocotools.mask as mask_util -from tqdm import tqdm - -from models.experimental import attempt_load -from utils.segment.dataloaders import create_dataloader -from utils.general import (box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, ) -from utils.general import (check_dataset, check_img_size, check_suffix) -from utils.general import (coco80_to_coco91_class, increment_path, colorstr, ) -from utils.plots import output_to_target, plot_images_boxes_and_masks -from utils.segment.metrics import ap_per_class, ap_per_class_box_and_mask, ConfusionMatrix -from utils.segment.general import (non_max_suppression_masks, mask_iou, process_mask, process_mask_upsample, scale_masks, ) -from utils.torch_utils import select_device, time_sync, de_parallel - - -def save_one_txt(predn, save_conf, shape, file): - # Save one txt result - gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh - for *xyxy, conf, cls in predn.tolist(): - xywh = ((xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()) # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(file, "a") as f: - f.write(("%g " * len(line)).rstrip() % line + "\n") - - -def save_one_json(predn, jdict, path, class_map, pred_masks=None): - # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} - image_id = int(path.stem) if path.stem.isnumeric() else path.stem - box = xyxy2xywh(predn[:, :4]) # xywh - box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - - if pred_masks is not None: - pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] - for rle in rles: - rle["counts"] = rle["counts"].decode("utf-8") - - for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): - pred_dict = {"image_id": image_id, "category_id": class_map[int(p[5])], "bbox": [round(x, 3) for x in b], - "score": round(p[4], 5), } - if pred_masks is not None: - pred_dict["segmentation"] = rles[i] - jdict.append(pred_dict) - - -@torch.no_grad() -class Yolov5Evaluator: - def __init__(self, data, conf_thres=0.001, iou_thres=0.6, device="", single_cls=False, augment=False, verbose=False, - project="runs/val", name="exp", exist_ok=False, half=True, save_dir=Path(""), nosave=False, plots=True, - max_plot_dets=10, mask=False, mask_downsample_ratio=1, overlap=False) -> None: - self.data = check_dataset(data) # check - self.conf_thres = conf_thres # confidence threshold - self.iou_thres = iou_thres # NMS IoU threshold - self.device = device # cuda device, i.e. 0 or 0,1,2,3 or cpu - self.single_cls = single_cls # treat as single-class dataset - self.augment = augment # augmented inference - self.verbose = verbose # verbose output - self.project = project # save to project/name - self.name = name # save to project/name - self.exist_ok = exist_ok # existing project/name ok, do not increment - self.half = half # use FP16 half-precision inference - self.save_dir = save_dir - self.nosave = nosave - self.plots = plots - self.max_plot_dets = max_plot_dets - self.mask = mask - self.mask_downsample_ratio = mask_downsample_ratio - self.overlap = overlap - - self.nc = 1 if self.single_cls else int(self.data["nc"]) # number of classes - self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 - self.niou = self.iouv.numel() - self.confusion_matrix = ConfusionMatrix(nc=self.nc) - self.dt = [0.0, 0.0, 0.0] - self.names = {k: v for k, v in enumerate(self.data["names"])} - self.s = (("%20s" + "%11s" * 10) % ( - "Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", "mAP@.5", - "mAP@.5:.95}",) if self.mask else ("%20s" + "%11s" * 6) % ( - "Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95",)) - self.step = 0 - - # coco stuff - self.is_coco = isinstance(self.data.get("val"), str) and self.data["val"].endswith( - "coco/val2017.txt") # COCO dataset - self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) - self.jdict = [] - self.iou_thres = 0.65 if self.is_coco else self.iou_thres - - # masks stuff - self.pred_masks = [] # for mask visualization - - # metric stuff - self.seen = 0 - self.stats = [] - self.total_loss = torch.zeros((4 if self.mask else 3)) - self.metric = Metrics() if self.mask else Metric() - - @torch.no_grad() - def run_training(self, model, dataloader, compute_loss=None): - """This is for evaluation when training.""" - self.seen = 0 - self.device = next(model.parameters()).device # get model device - # self.iouv.to(self.device) - self.total_loss = torch.zeros((4 if self.mask else 3), device=self.device) - self.half &= self.device.type != "cpu" # half precision only supported on CUDA - model.half() if self.half else model.float() - # Configure - model.eval() - - # inference - # masks will be `None` if training objection. - for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): - # reset pred_masks - self.pred_masks = [] - img = img.to(self.device, non_blocking=True) - targets = targets.to(self.device) - if masks is not None: - masks = masks.to(self.device).float() - out, train_out = self.inference(model, img, targets, masks, compute_loss) - - # Statistics per image - for si, pred in enumerate(out): - self.seen += 1 - - # eval in every image level - labels = targets[targets[:, 0] == si, 1:] - midx = [si] if self.overlap else targets[:, 0] == si - gt_masksi = masks[midx] if masks is not None else None - - # get predition masks - proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks(pred, proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, ) - - # for visualization - if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) - - # NOTE: eval in training image-size space - self.compute_stat(pred, pred_maski, labels, gt_masksi) - - if self.plots and batch_i < 2: - self.plot_images(batch_i, img, targets, masks, out, paths) - - # compute map and print it. - t = self.after_infer() - - # Return results - model.float() # for training - self.step += 1 - return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), - self.metric.get_maps(self.nc), t,) - - def run(self, weights, batch_size, imgsz, save_txt=False, save_conf=False, save_json=False, task="val", ): - """This is for native evaluation.""" - model, dataloader, imgsz = self.before_infer(weights, batch_size, imgsz, save_txt, task) - self.seen = 0 - # self.iouv.to(self.device) - self.half &= self.device.type != "cpu" # half precision only supported on CUDA - model.half() if self.half else model.float() - # Configure - model.eval() - - # inference - for batch_i, (img, targets, paths, shapes, masks) in enumerate(tqdm(dataloader, desc=self.s)): - # reset pred_masks - self.pred_masks = [] - img = img.to(self.device, non_blocking=True) - targets = targets.to(self.device) - if masks is not None: - masks = masks.to(self.device).float() - out, train_out = self.inference(model, img, targets, masks) - - # Statistics per image - for si, pred in enumerate(out): - self.seen += 1 - path = Path(paths[si]) - shape = shapes[si][0] - ratio_pad = shapes[si][1] - - # eval in every image level - labels = targets[targets[:, 0] == si, 1:] - midx = [si] if self.overlap else targets[:, 0] == si - gt_masksi = masks[midx] if masks is not None else None - - # get predition masks - proto_out = train_out[1][si] if isinstance(train_out, tuple) else None - pred_maski = self.get_predmasks(pred, proto_out, - gt_masksi.shape[1:] if gt_masksi is not None else None, ) - - # for visualization - if self.plots and batch_i < 3 and pred_maski is not None: - self.pred_masks.append(pred_maski[:self.max_plot_dets].cpu()) - - # NOTE: eval in training image-size space - self.compute_stat(pred, pred_maski, labels, gt_masksi) - - # no preditions, not save anything - if len(pred) == 0: - continue - - if save_txt or save_json: - # clone() is for plot_images work correctly - predn = pred.clone() - # 因为test时添加了0.5的padding,因此这里与数据加载的padding不一致,所以需要转入ratio_pad - scale_coords(img[si].shape[1:], predn[:, :4], shape, ratio_pad) # native-space pred - - # Save/log - if save_txt and self.save_dir.exists(): - # NOTE: convert coords to native space when save txt. - # support save box preditions only - save_one_txt(predn, save_conf, shape, file=self.save_dir / "labels" / (path.stem + ".txt"), ) - if save_json and self.save_dir.exists(): - # NOTE: convert coords to native space when save json. - # if pred_maski is not None: - # h, w, n - pred_maski = scale_masks(img[si].shape[1:], pred_maski.permute(1, 2, 0).contiguous().cpu().numpy(), - shape, ratio_pad, ) - save_one_json(predn, self.jdict, path, self.class_map, - pred_maski, ) # append to COCO-JSON dictionary - - if self.plots and batch_i < 3: - self.plot_images(batch_i, img, targets, masks, out, paths) - - # compute map and print it. - t = self.after_infer() - - # save json - if self.save_dir.exists() and save_json: - pred_json = str(self.save_dir / f"predictions.json") # predictions json - print(f"\nEvaluating pycocotools mAP... saving {pred_json}...") - with open(pred_json, "w") as f: - json.dump(self.jdict, f) - - # Print speeds - shape = (batch_size, 3, imgsz, imgsz) - print(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t) - - s = ( - f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if save_txt and self.save_dir.exists() else "") - print(f"Results saved to {colorstr('bold', self.save_dir if self.save_dir.exists() else None)}{s}") - - # Return results - return ((*self.metric.mean_results(), *(self.total_loss.cpu() / len(dataloader)).tolist(),), - self.metric.get_maps(self.nc), t,) - - def before_infer(self, weights, batch_size, imgsz, save_txt, task="val"): - "prepare for evaluation without training." - self.device = select_device(self.device, batch_size=batch_size) - - # Directories - self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok) # increment run - if not self.nosave: - (self.save_dir / "labels" if save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) # make dir - - # Load model - check_suffix(weights, ".pt") - model = attempt_load(weights, device=self.device) # load FP32 model - gs = max(int(model.stride.max()), 32) # grid size (max stride) - imgsz = check_img_size(imgsz, s=gs) # check image size - - # Data - if self.device.type != "cpu": - model(torch.zeros(1, 3, imgsz, imgsz).to(self.device).type_as(next(model.parameters()))) # run once - pad = 0.0 if task == "speed" else 0.5 - task = (task if task in ("train", "val", "test") else "val") # path to train/val/test images - dataloader = create_dataloader(self.data[task], imgsz, batch_size, gs, self.single_cls, pad=pad, rect=True, - prefix=colorstr(f"{task}: "), mask_head=self.mask, mask_downsample_ratio=self.mask_downsample_ratio, )[0] - return model, dataloader, imgsz - - def inference(self, model, img, targets, masks=None, compute_loss=None): - """Inference""" - t1 = time_sync() - img = img.half() if self.half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - _, _, height, width = img.shape # batch size, channels, height, width - t2 = time_sync() - self.dt[0] += t2 - t1 - - # Run model - out, train_out = model(img, augment=self.augment) # inference and training outputs - self.dt[1] += time_sync() - t2 - - # Compute loss - if compute_loss: - self.total_loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls - - # Run NMS - targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(self.device) # to pixels - t3 = time_sync() - out = self.nms(prediction=out, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True, - agnostic=self.single_cls, mask_dim=de_parallel(model).model[-1].mask_dim) - self.dt[2] += time_sync() - t3 - return out, train_out - - def after_infer(self): - """Do something after inference, such as plots and get metrics. - Return: - t(tuple): speeds of per image. - """ - # Plot confusion matrix - if self.plots and self.save_dir.exists(): - self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values())) - - # Compute statistics - stats = [np.concatenate(x, 0) for x in zip(*self.stats)] # to numpy - box_or_mask_any = stats[0].any() or stats[1].any() - stats = stats[1:] if not self.mask else stats - if len(stats) and box_or_mask_any: - results = self.ap_per_class(*stats, self.plots, self.save_dir if self.save_dir.exists() else None, - self.names, ) - self.metric.update(results) - nt = np.bincount(stats[(3 if not self.mask else 4)].astype(np.int64), - minlength=self.nc) # number of targets per class - else: - nt = torch.zeros(1) - - # make this empty, cause make `stats` self is for reduce some duplicated codes. - self.stats = [] - # print information - self.print_metric(nt, stats) - t = tuple(x / self.seen * 1e3 for x in self.dt) # speeds per image - return t - - def process_batch(self, detections, labels, iouv): - """ - Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. - Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 - Returns: - correct (Array[N, 10]), for 10 IoU levels - """ - correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) - iou = box_iou(labels[:, 1:], detections[:, :4]) - x = torch.where( - (iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5])) # IoU above threshold and classes match - if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(iouv.device) - correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv - return correct - - def get_predmasks(self, pred, proto_out, gt_shape): - """Get pred masks in different ways. - 1. process_mask, for val when training, eval with low quality(1/mask_ratio of original size) - mask for saving cuda memory. - 2. process_mask_upsample, for val after training to get high quality mask(original size). - - Args: - pred(torch.Tensor): output of network, (N, 5 + mask_dim + class). - proto_out(torch.Tensor): output of mask prototype, (mask_dim, mask_h, mask_w). - gt_shape(tuple): shape of gt mask, this shape may not equal to input size of - input image, Cause the mask_downsample_ratio. - Return: - pred_mask(torch.Tensor): predition of final masks with the same size with - input image, (N, input_h, input_w). - """ - if proto_out is None or len(pred) == 0: - return None - process = process_mask_upsample if self.plots else process_mask - gt_shape = (gt_shape[0] * self.mask_downsample_ratio, gt_shape[1] * self.mask_downsample_ratio,) - # n, h, w - pred_mask = (process(proto_out, pred[:, 6:], pred[:, :4], shape=gt_shape).permute(2, 0, 1).contiguous()) - return pred_mask - - def process_batch_masks(self, predn, pred_maski, gt_masksi, labels): - assert not ((pred_maski is None) ^ ( - gt_masksi is None)), "`proto_out` and `gt_masksi` should be both None or both exist." - if pred_maski is None and gt_masksi is None: - return torch.zeros(0, self.niou, dtype=torch.bool) - - correct = torch.zeros(predn.shape[0], self.iouv.shape[0], dtype=torch.bool, device=self.iouv.device, ) - - # convert masks (1, 640, 640) -> (n, 640, 640) - if self.overlap: - nl = len(labels) - index = torch.arange(nl, device=gt_masksi.device).view(nl, 1, 1) + 1 - gt_masksi = gt_masksi.repeat(nl, 1, 1) - gt_masksi = torch.where(gt_masksi == index, 1.0, 0.0) - - if gt_masksi.shape[1:] != pred_maski.shape[1:]: - gt_masksi = F.interpolate(gt_masksi.unsqueeze(0), pred_maski.shape[1:], mode="bilinear", - align_corners=False, ).squeeze(0) - - iou = mask_iou(gt_masksi.view(gt_masksi.shape[0], -1), pred_maski.view(pred_maski.shape[0], -1), ) - x = torch.where( - (iou >= self.iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match - if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(self.iouv.device) - correct[matches[:, 1].long()] = matches[:, 2:3] >= self.iouv - return correct - - def compute_stat(self, predn, pred_maski, labels, gt_maski): - """Compute states about ious. with boxs size in training img-size space.""" - nl = len(labels) - tcls = labels[:, 0].tolist() if nl else [] # target class - - if len(predn) == 0: - if nl: - self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), # boxes - torch.zeros(0, self.niou, dtype=torch.bool), # masks - torch.Tensor(), torch.Tensor(), tcls,)) - return - - # Predictions - if self.single_cls: - predn[:, 5] = 0 - - # Evaluate - if nl: - tbox = xywh2xyxy(labels[:, 1:5]) # target boxes - labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels - # boxes - correct_boxes = self.process_batch(predn, labelsn, self.iouv) - - # masks - correct_masks = self.process_batch_masks(predn, pred_maski, gt_maski, labelsn) - - if self.plots: - self.confusion_matrix.process_batch(predn, labelsn) - else: - correct_boxes = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) - correct_masks = torch.zeros(predn.shape[0], self.niou, dtype=torch.bool) - self.stats.append((correct_masks.cpu(), correct_boxes.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(), - tcls,)) # (correct, conf, pcls, tcls) - - def print_metric(self, nt, stats): - # Print results - pf = "%20s" + "%11i" * 2 + "%11.3g" * (8 if self.mask else 4) - print(pf % ("all", self.seen, nt.sum(), *self.metric.mean_results())) - - # Print results per class - # TODO: self.seen support verbose. - if self.verbose and self.nc > 1 and len(stats): - for i, c in enumerate(self.metric.ap_class_index): - print(pf % (self.names[c], self.seen, nt[c], *self.metric.class_result(i))) - - def plot_images(self, i, img, targets, masks, out, paths): - if not self.save_dir.exists(): - return - # plot ground truth - f = self.save_dir / f"val_batch{i}_labels.jpg" # labels - - if masks is not None and masks.shape[1:] != img.shape[2:]: - masks = F.interpolate( - masks.unsqueeze(0).float(), - img.shape[2:], - mode="bilinear", - align_corners=False, - ).squeeze(0) - - Thread(target=plot_images_boxes_and_masks, args=(img, targets, masks, paths, f, self.names, max(img.shape[2:])), - daemon=True, ).start() - f = self.save_dir / f"val_batch{i}_pred.jpg" # predictions - - # plot predition - if len(self.pred_masks): - pred_masks = (torch.cat(self.pred_masks, dim=0) if len(self.pred_masks) > 1 else self.pred_masks[0]) - else: - pred_masks = None - plot_images_boxes_and_masks(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:])) - #Thread(target=plot_images_boxes_and_masks, - # args=(img, output_to_target(out, filter_dets=self.max_plot_dets), pred_masks, paths, f, self.names, max(img.shape[2:]),), - # daemon=True, ).start() - # import wandb - # if wandb.run: - # wandb.log({f"pred_{i}": wandb.Image(str(f))}, step=self.step) - - def nms(self, **kwargs): - return (non_max_suppression_masks(**kwargs) if self.mask else non_max_suppression(**kwargs)) - - def ap_per_class(self, *args): - return ap_per_class_box_and_mask(*args) if self.mask else ap_per_class(*args) - - -class Metric: - def __init__(self) -> None: - self.p = [] # (nc, ) - self.r = [] # (nc, ) - self.f1 = [] # (nc, ) - self.all_ap = [] # (nc, 10) - self.ap_class_index = [] # (nc, ) - - @property - def ap50(self): - """AP@0.5 of all classes. - Return: - (nc, ) or []. - """ - return self.all_ap[:, 0] if len(self.all_ap) else [] - - @property - def ap(self): - """AP@0.5:0.95 - Return: - (nc, ) or []. - """ - return self.all_ap.mean(1) if len(self.all_ap) else [] - - @property - def mp(self): - """mean precision of all classes. - Return: - float. - """ - return self.p.mean() if len(self.p) else 0.0 - - @property - def mr(self): - """mean recall of all classes. - Return: - float. - """ - return self.r.mean() if len(self.r) else 0.0 - - @property - def map50(self): - """Mean AP@0.5 of all classes. - Return: - float. - """ - return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 - - @property - def map(self): - """Mean AP@0.5:0.95 of all classes. - Return: - float. - """ - return self.all_ap.mean() if len(self.all_ap) else 0.0 - - def mean_results(self): - """Mean of results, return mp, mr, map50, map""" - return (self.mp, self.mr, self.map50, self.map) - - def class_result(self, i): - """class-aware result, return p[i], r[i], ap50[i], ap[i]""" - return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) - - def get_maps(self, nc): - maps = np.zeros(nc) + self.map - for i, c in enumerate(self.ap_class_index): - maps[c] = self.ap[i] - return maps - - def update(self, results): - """ - Args: - results: tuple(p, r, ap, f1, ap_class) - """ - p, r, all_ap, f1, ap_class_index = results - self.p = p - self.r = r - self.all_ap = all_ap - self.f1 = f1 - self.ap_class_index = ap_class_index - - -class Metrics: - """Metric for boxes and masks.""" - - def __init__(self) -> None: - self.metric_box = Metric() - self.metric_mask = Metric() - - def update(self, results): - """ - Args: - results: Dict{'boxes': Dict{}, 'masks': Dict{}} - """ - self.metric_box.update(list(results["boxes"].values())) - self.metric_mask.update(list(results["masks"].values())) - - def mean_results(self): - return self.metric_box.mean_results() + self.metric_mask.mean_results() - - def class_result(self, i): - return self.metric_box.class_result(i) + self.metric_mask.class_result(i) - - def get_maps(self, nc): - return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) - - @property - def ap_class_index(self): - # boxes and masks have the same ap_class_index - return self.metric_box.ap_class_index diff --git a/segment/val.py b/segment/val.py index a2a4eb526773..877296e2b6dc 100644 --- a/segment/val.py +++ b/segment/val.py @@ -317,7 +317,7 @@ def run( pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary - callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) + callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si]) # Plot images if plots and batch_i < 3: From 71776e1cbc01d2933211a67a794b29c615879557 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 11 Aug 2022 20:23:04 +0800 Subject: [PATCH 054/247] fix mixup --- segment/val.py | 21 ++++++++++++--------- utils/segment/augmentations.py | 7 +++++++ utils/segment/dataloaders.py | 8 ++++---- utils/segment/general.py | 2 -- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/segment/val.py b/segment/val.py index 877296e2b6dc..7387e2a5c30b 100644 --- a/segment/val.py +++ b/segment/val.py @@ -274,14 +274,6 @@ def run( # Metrics for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] - midx = [si] if overlap else targets[:, 0] == si - gt_masks = masks[midx] - proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], - shape=im[si].shape[1:]).permute(2, 0, 1).contiguous() - if plots and batch_i < 3: - plot_masks.append(pred_masks[:15].cpu()) - nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init @@ -293,6 +285,16 @@ def run( stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) continue + # deal with masks + midx = [si] if overlap else targets[:, 0] == si + gt_masks = masks[midx] + proto_out = train_out[1][si] + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], + shape=im[si].shape[1:]).permute(2, 0, 1).contiguous() + if plots and batch_i < 3: + # filter top 15 to plot + plot_masks.append(pred_masks[:15].cpu()) + # Predictions if single_cls: pred[:, 5] = 0 @@ -379,7 +381,8 @@ def run( # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights - anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json + # anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json + anno_json = "/d/dataset/COCO/annotations/instances_val2017.json" pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py index be788a81ea94..c532119c5058 100644 --- a/utils/segment/augmentations.py +++ b/utils/segment/augmentations.py @@ -12,6 +12,13 @@ from ..general import segment2box, resample_segments from ..augmentations import box_candidates +def mixup(im, labels, segments, im2, labels2, segments2): + # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + im = (im * r + im2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + segments = np.concatenate((segments, segments2), 0) + return im, labels, segments def random_perspective(im, targets=(), diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py index 0230bcee13d2..89ac50dba401 100644 --- a/utils/segment/dataloaders.py +++ b/utils/segment/dataloaders.py @@ -12,11 +12,11 @@ from torch.utils.data import DataLoader from torch.utils.data import distributed -from ..augmentations import augment_hsv, copy_paste, letterbox, mixup +from ..augmentations import augment_hsv, copy_paste, letterbox from ..dataloaders import LoadImagesAndLabels, InfiniteDataLoader, seed_worker from ..general import xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER from ..torch_utils import torch_distributed_zero_first -from .augmentations import random_perspective +from .augmentations import random_perspective, mixup def create_dataloader(path, @@ -96,10 +96,10 @@ def __getitem__(self, index): img, labels, segments = self.load_mosaic(index) shapes = None - # TODO: Mixup not support segment for now # MixUp augmentation if random.random() < hyp["mixup"]: - img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.num_imgs - 1))) + img, labels, segments = mixup(img, labels, segments, + *self.load_mosaic(random.randint(0, self.n - 1))) else: # Load image diff --git a/utils/segment/general.py b/utils/segment/general.py index d24b263bcc59..00367e7268fd 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -149,9 +149,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): """ # mask_h, mask_w, n masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T - # print(masks.shape) masks = masks.sigmoid() - # print('after sigmoid:', masks) masks = masks.permute(2, 0, 1).contiguous() # [n, mask_h, mask_w] masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0) From a24b3fd1698078f47a91e2570701631ebf8bc24e Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Thu, 11 Aug 2022 20:40:03 +0800 Subject: [PATCH 055/247] update val(temp way) --- segment/val.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/segment/val.py b/segment/val.py index 7387e2a5c30b..63a6f479b89f 100644 --- a/segment/val.py +++ b/segment/val.py @@ -37,6 +37,7 @@ import torch.nn.functional as F import pycocotools.mask as mask_util from models.common import DetectMultiBackend +from models.experimental import attempt_load # scoped to avoid circular import from utils.callbacks import Callbacks from utils.segment.dataloaders import create_dataloader from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, @@ -183,14 +184,20 @@ def run( (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) - stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine + # model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + model = attempt_load(weights, device=device) # load FP32 model + stride = 32 + pt, jit, engine = True, False, False + # stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size - half = model.fp16 # FP16 supported on limited backends with CUDA + # half = model.fp16 # FP16 supported on limited backends with CUDA + half = device.type != 'cpu' + if half: + model.half() if engine: batch_size = model.batch_size else: - device = model.device + # device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') @@ -209,10 +216,10 @@ def run( # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data - ncm = model.model.nc + ncm = model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' - model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup + # model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad = 0.0 if task in ('speed', 'benchmark') else 0.5 rect = False if task == 'benchmark' else pt # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images @@ -254,7 +261,7 @@ def run( dt[0] += t2 - t1 # Inference - out, train_out = model(im) if training else model(im, augment=augment, val=True) # inference, loss outputs + out, train_out = model(im) #if training else model(im, augment=augment, val=True) # inference, loss outputs dt[1] += time_sync() - t2 # Loss @@ -318,7 +325,7 @@ def run( if save_json: pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) - save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary + save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si]) # Plot images From 8cf90edc2dd95da59d5c9b205945c2a3669417c6 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Sun, 14 Aug 2022 02:41:26 +0000 Subject: [PATCH 056/247] clean up --- utils/segment/augmentations.py | 33 +++-------- utils/segment/plots.py | 104 --------------------------------- 2 files changed, 8 insertions(+), 129 deletions(-) diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py index c532119c5058..dc29df6ad8ad 100644 --- a/utils/segment/augmentations.py +++ b/utils/segment/augmentations.py @@ -81,37 +81,20 @@ def random_perspective(im, n = len(targets) new_segments = [] if n: - use_segments = any(x.any() for x in segments) new = np.zeros((n, 4)) - if use_segments: # warp segments - segments = resample_segments(segments) # upsample - for i, segment in enumerate(segments): - xy = np.ones((len(segment), 3)) - xy[:, :2] = segment - xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine - - # clip - new[i] = segment2box(xy, width, height) - new_segments.append(xy) - - else: # warp boxes - xy = np.ones((n * 4, 3)) - xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine - - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - new = (np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T) + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine # clip - new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) - new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) + new[i] = segment2box(xy, width, height) + new_segments.append(xy) # filter candidates - i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) targets = targets[i] targets[:, 1:5] = new[i] new_segments = np.array(new_segments)[i] diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 77fb983fe8d1..eb1e9b61d01a 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -201,110 +201,6 @@ def plot_images_and_masks( im.save(fname) return mosaic -# def plot_images_and_masks( -# images, -# targets, -# masks, -# paths=None, -# fname="images.jpg", -# names=None, -# max_size=640, -# max_subplots=16, -# ): -# # plot masks first in torch way, -# # this is faster if masks are in cuda. -# masks = torch.as_tensor(masks, dtype=torch.float32) -# images = torch.as_tensor(images, dtype=torch.float32, device=masks.device) -# if isinstance(targets, torch.Tensor): -# targets = targets.cpu().numpy() -# -# # normalize -# if images[0].max() > 1: -# images /= 255 -# -# images_with_masks = [] -# for i, img in enumerate(images): -# if len(targets) == 0: -# continue -# idx = (targets[:, 0]).astype(int) -# image_targets = targets[idx == i] -# mcolors = np.array([colors(int(cls), bgr=True) for cls in image_targets[:, 1]]) -# labels = image_targets.shape[1] == 6 # labels if no conf column -# conf = ( -# None if labels else image_targets[:, 6] -# ) # check for confidence presence (label vs pred) -# -# if masks.max() > 1.0: # mean that masks are overlap -# image_masks = masks[[i]] # (1, 640, 640) -# # convert masks (1, 640, 640) -> (n, 640, 640) -# nl = len(image_targets) -# index = torch.arange(nl, device=image_masks.device).view(nl, 1, 1) + 1 -# image_masks = image_masks.repeat(nl, 1, 1) -# image_masks = torch.where(image_masks == index, 1.0, 0.0) -# else: -# image_masks = masks[idx == i] -# if conf is not None: -# image_masks = image_masks[conf > 0.25] -# mcolors = mcolors[conf > 0.25] -# image_with_masks = plot_masks(img, image_masks, mcolors) -# images_with_masks.append(image_with_masks[..., ::-1]) -# images = np.stack(images_with_masks, axis=0) -# -# bs, h, w, _,= images.shape # batch size, _, height, width -# bs = min(bs, max_subplots) # limit plot images -# ns = np.ceil(bs ** 0.5) # number of subplots (square) -# -# mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init -# for i, im in enumerate(images): -# if i == max_subplots: # if last batch has fewer images than we expect -# break -# x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin -# mosaic[y : y + h, x : x + w, :] = im -# -# # Resize (optional) -# scale = max_size / ns / max(h, w) -# if scale < 1: -# h = math.ceil(scale * h) -# w = math.ceil(scale * w) -# mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) -# -# # Annotate -# fs = int((h + w) * ns * 0.01) # font size -# annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True) -# for i in range(i + 1): -# x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin -# annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders -# if paths: -# annotator.text( -# (x + 5, y + 5 + h), -# text=Path(paths[i]).name[:40], -# txt_color=(220, 220, 220), -# ) # filenames -# if len(targets) > 0: -# ti = targets[targets[:, 0] == i] # image targets -# boxes = xywh2xyxy(ti[:, 2:6]).T -# classes = ti[:, 1].astype("int") -# labels = ti.shape[1] == 6 # labels if no conf column -# conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) -# -# if boxes.shape[1]: -# if boxes.max() <= 1.01: # if normalized with tolerance 0.01 -# boxes[[0, 2]] *= w # scale to pixels -# boxes[[1, 3]] *= h -# elif scale < 1: # absolute coords need scale if image scales -# boxes *= scale -# boxes[[0, 2]] += x -# boxes[[1, 3]] += y -# for j, box in enumerate(boxes.T.tolist()): -# cls = classes[j] -# color = colors(cls) -# cls = names[cls] if names else cls -# if labels or conf[j] > 0.25: # 0.25 conf thresh -# label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}" -# annotator.box_label(box, label, color=color) -# annotator.im.save(fname) # save -# return annotator.result() - def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') From 8ecbde80334a0799ca094c3bc3b9f28a31cb1781 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Sun, 14 Aug 2022 02:41:54 +0000 Subject: [PATCH 057/247] cancel generator --- utils/dataloaders.py | 7 ++++--- utils/segment/dataloaders.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 260fb6a97da9..2a06762c9c86 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -135,8 +135,8 @@ def create_dataloader(path, nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates - generator = torch.Generator() - generator.manual_seed(0) + # generator = torch.Generator() + # generator.manual_seed(0) return loader(dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, @@ -145,7 +145,8 @@ def create_dataloader(path, pin_memory=True, collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, worker_init_fn=seed_worker, - generator=generator), dataset + # generator=generator, + ), dataset class InfiniteDataLoader(dataloader.DataLoader): diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py index 89ac50dba401..f4af39617dea 100644 --- a/utils/segment/dataloaders.py +++ b/utils/segment/dataloaders.py @@ -62,8 +62,8 @@ def create_dataloader(path, nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates - generator = torch.Generator() - generator.manual_seed(0) + # generator = torch.Generator() + # generator.manual_seed(0) return loader(dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, @@ -72,7 +72,8 @@ def create_dataloader(path, pin_memory=True, collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, worker_init_fn=seed_worker, - generator=generator), dataset + # generator=generator, + ), dataset class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing From e714bc112305079f3ba924a9d480f50146ea96bc Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Sun, 14 Aug 2022 02:42:13 +0000 Subject: [PATCH 058/247] revert loss --- utils/segment/loss.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 47fed765f990..992fe98499ff 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -92,7 +92,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model if self.sort_obj_iou: sort_id = torch.argsort(score_iou) b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],) - tobj[b, a, gj, gi] = 0.5 * ((1.0 - self.gr) + self.gr * score_iou) # iou ratio + tobj[b, a, gj, gi] = 1.0 * ((1.0 - self.gr) + self.gr * score_iou) # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) @@ -131,12 +131,12 @@ def __call__(self, preds, targets, masks): # predictions, targets, model psi = ps[index][:, 5: self.nm] proto = proto_out[bi] - one_lseg, iou = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) + one_lseg = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) batch_lseg += one_lseg - # update tobj - iou = iou.detach().clamp(0).type(tobj.dtype) - tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0] + # # update tobj + # iou = iou.detach().clamp(0).type(tobj.dtype) + # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0] lseg += batch_lseg / len(b.unique()) @@ -161,11 +161,11 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): # (80, 80, 32) @ (32, n) -> (80, 80, n) pred_mask = proto @ pred.tanh().T # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy) - iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True) + # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True) lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") lseg = crop(lseg, xyxy) lseg = lseg.mean(dim=(0, 1)) / w / h - return lseg.mean(), iou# + lseg_iou.mean() + return lseg.mean()#, iou# + lseg_iou.mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) From 83b4020d14cde2a08caf7012d1a1bed3b8c4bf6a Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Sun, 14 Aug 2022 02:42:35 +0000 Subject: [PATCH 059/247] update train.py&&val.py --- segment/train.py | 5 ++--- segment/val.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/segment/train.py b/segment/train.py index 3a06915eb061..ea3ca58316c0 100644 --- a/segment/train.py +++ b/segment/train.py @@ -68,7 +68,6 @@ from datetime import datetime def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary - print(device) save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio @@ -419,7 +418,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, - plots=False, + plots=plots, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, @@ -485,7 +484,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio plots=plots, callbacks=callbacks, compute_loss=compute_loss, - mask_downsample_ratio=1, + mask_downsample_ratio=mask_ratio, overlap=overlap) # val best model with plots if is_coco: callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) diff --git a/segment/val.py b/segment/val.py index 63a6f479b89f..1045a3959793 100644 --- a/segment/val.py +++ b/segment/val.py @@ -300,7 +300,7 @@ def run( shape=im[si].shape[1:]).permute(2, 0, 1).contiguous() if plots and batch_i < 3: # filter top 15 to plot - plot_masks.append(pred_masks[:15].cpu()) + plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu()) # Predictions if single_cls: @@ -388,8 +388,7 @@ def run( # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights - # anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json - anno_json = "/d/dataset/COCO/annotations/instances_val2017.json" + anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: From c381d575f57a1c51a42c5c6a57d02f36831d1a1d Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Wed, 17 Aug 2022 15:47:43 +0530 Subject: [PATCH 060/247] rearrange model files --- models/{ => segment}/yolov5l_seg.yaml | 0 models/{ => segment}/yolov5m_seg.yaml | 0 models/{ => segment}/yolov5n_seg.yaml | 0 models/{ => segment}/yolov5s_seg.yaml | 0 models/{ => segment}/yolov5x_seg.yaml | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename models/{ => segment}/yolov5l_seg.yaml (100%) rename models/{ => segment}/yolov5m_seg.yaml (100%) rename models/{ => segment}/yolov5n_seg.yaml (100%) rename models/{ => segment}/yolov5s_seg.yaml (100%) rename models/{ => segment}/yolov5x_seg.yaml (100%) diff --git a/models/yolov5l_seg.yaml b/models/segment/yolov5l_seg.yaml similarity index 100% rename from models/yolov5l_seg.yaml rename to models/segment/yolov5l_seg.yaml diff --git a/models/yolov5m_seg.yaml b/models/segment/yolov5m_seg.yaml similarity index 100% rename from models/yolov5m_seg.yaml rename to models/segment/yolov5m_seg.yaml diff --git a/models/yolov5n_seg.yaml b/models/segment/yolov5n_seg.yaml similarity index 100% rename from models/yolov5n_seg.yaml rename to models/segment/yolov5n_seg.yaml diff --git a/models/yolov5s_seg.yaml b/models/segment/yolov5s_seg.yaml similarity index 100% rename from models/yolov5s_seg.yaml rename to models/segment/yolov5s_seg.yaml diff --git a/models/yolov5x_seg.yaml b/models/segment/yolov5x_seg.yaml similarity index 100% rename from models/yolov5x_seg.yaml rename to models/segment/yolov5x_seg.yaml From 0a965bf16dadf723bbd7f0c4e7ad2a40a34fddcc Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 19:59:34 +0530 Subject: [PATCH 061/247] create temp trainer --- segment/train_temp.py | 708 ++++++++++++++++++++++++++++++++++++++ utils/loggers/__init__.py | 79 +++++ utils/segment/metrics.py | 34 ++ 3 files changed, 821 insertions(+) create mode 100644 segment/train_temp.py diff --git a/segment/train_temp.py b/segment/train_temp.py new file mode 100644 index 000000000000..57c23811cf53 --- /dev/null +++ b/segment/train_temp.py @@ -0,0 +1,708 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Train a YOLOv5 model on a custom dataset. + +Models and datasets download automatically from the latest YOLOv5 release. +Models: https://github.com/ultralytics/yolov5/tree/master/models +Datasets: https://github.com/ultralytics/yolov5/tree/master/data +Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data + +Usage: + $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED) + $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch +""" + +import argparse +import math +import os +import random +import sys +import time +from copy import deepcopy +from datetime import datetime +from pathlib import Path + +import val # for end-of-epoch mAP +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +import yaml +from torch.nn.parallel import DistributedDataParallel as DDP +import torch.nn.functional as F +from torch.optim import SGD, Adam, AdamW, lr_scheduler +from tqdm import tqdm + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.experimental import attempt_load +from models.yolo import Model +from utils.autoanchor import check_anchors +from utils.autobatch import check_train_batch_size +from utils.callbacks import Callbacks +from utils.segment.dataloaders import create_dataloader +from utils.downloads import attempt_download +from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, + check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, + increment_path, init_seeds, intersect_dicts, labels_to_class_weights, + labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) +from utils.loggers import GenericLogger +from utils.loggers.wandb.wandb_utils import check_wandb_resume +from utils.segment.loss import ComputeLoss +from utils.segment.metrics import fitness +from utils.plots import plot_evolve, plot_labels +from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first + + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) +from utils.general import LOGGER, check_amp, check_version +from utils.autobatch import check_train_batch_size +from utils.segment.plots import plot_images_and_masks, plot_results_with_masks +from utils.segment.metrics import KEYS, BEST_KEYS +from torch.optim import AdamW +import yaml +from datetime import datetime + +def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary + save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ + Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ + opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio + + # Directories + w = save_dir / 'weights' # weights dir + (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir + last, best = w / 'last.pt', w / 'best.pt' + + # Hyperparameters + if isinstance(hyp, str): + with open(hyp, errors='ignore') as f: + hyp = yaml.safe_load(f) # load hyps dict + LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) + + # Save run settings + if not evolve: + with open(save_dir / 'hyp.yaml', 'w') as f: + yaml.safe_dump(hyp, f, sort_keys=False) + with open(save_dir / 'opt.yaml', 'w') as f: + yaml.safe_dump(vars(opt), f, sort_keys=False) + + # Loggers + data_dict = None + if RANK in {-1, 0}: + logger = GenericLogger( + opt=opt, console_logger=LOGGER + ) # loggers instance + + # Register actions + # for k in methods(loggers): + # callbacks.register_action(k, callback=getattr(loggers, k)) + + # Config + plots = not evolve and not opt.noplots # create plots + overlap = opt.overlap_mask + cuda = device.type != 'cpu' + init_seeds(opt.seed + 1 + RANK, True) + with torch_distributed_zero_first(LOCAL_RANK): + data_dict = data_dict or check_dataset(data) # check if None + train_path, val_path = data_dict['train'], data_dict['val'] + nc = 1 if single_cls else int(data_dict['nc']) # number of classes + names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names + assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check + is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset + + # Model + check_suffix(weights, '.pt') # check weights + pretrained = weights.endswith('.pt') + if pretrained: + with torch_distributed_zero_first(LOCAL_RANK): + weights = attempt_download(weights) # download if not found locally + ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak + model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys + csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 + csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect + model.load_state_dict(csd, strict=False) # load + LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report + else: + model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + amp = check_amp(model) # check AMP + + # Freeze + freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze + for k, v in model.named_parameters(): + v.requires_grad = True # train all layers + if any(x in k for x in freeze): + LOGGER.info(f'freezing {k}') + v.requires_grad = False + + # Image size + gs = max(int(model.stride.max()), 32) # grid size (max stride) + imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple + + # Batch size + if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size + batch_size = check_train_batch_size(model, imgsz, amp) + logger.update_params({"batch_size": batch_size}) + + # Optimizer + nbs = 64 # nominal batch size + accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing + hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay + LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") + + g = [], [], [] # optimizer parameter groups + bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() + for v in model.modules(): + if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias + g[2].append(v.bias) + if isinstance(v, bn): # weight (no decay) + g[1].append(v.weight) + elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) + g[0].append(v.weight) + + # hyp['lr0'] = hyp['lr0'] / batch_size * 128 + # hyp['warmup_bias_lr'] = 0.01 + if opt.optimizer == 'Adam': + optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + elif opt.optimizer == 'AdamW': + optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + else: + optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) + + optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay + optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights) + LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " + f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias") + del g + + # Scheduler + if opt.cos_lr: + lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] + else: + lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) + + # EMA + ema = ModelEMA(model) if RANK in {-1, 0} else None + + # Resume + start_epoch, best_fitness = 0, 0.0 + if pretrained: + # Optimizer + if ckpt['optimizer'] is not None: + optimizer.load_state_dict(ckpt['optimizer']) + best_fitness = ckpt['best_fitness'] + + # EMA + if ema and ckpt.get('ema'): + ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) + ema.updates = ckpt['updates'] + + # Epochs + start_epoch = ckpt['epoch'] + 1 + if resume: + assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.' + if epochs < start_epoch: + LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") + epochs += ckpt['epoch'] # finetune additional epochs + + del ckpt, csd + + # DP mode + if cuda and RANK == -1 and torch.cuda.device_count() > 1: + LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n' + 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') + model = torch.nn.DataParallel(model) + + # SyncBatchNorm + if opt.sync_bn and cuda and RANK != -1: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) + LOGGER.info('Using SyncBatchNorm()') + + # Trainloader + train_loader, dataset = create_dataloader(train_path, + imgsz, + batch_size // WORLD_SIZE, + gs, + single_cls, + hyp=hyp, + augment=True, + cache=None if opt.cache == 'val' else opt.cache, + rect=opt.rect, + rank=LOCAL_RANK, + workers=workers, + image_weights=opt.image_weights, + quad=opt.quad, + prefix=colorstr('train: '), + shuffle=True, + mask_downsample_ratio=mask_ratio, + overlap_mask=overlap, + ) + mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class + print("mlc , nc ", mlc, " ", nc ) + nb = len(train_loader) # number of batches + assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' + + # Process 0 + if RANK in {-1, 0}: + val_loader = create_dataloader(val_path, + imgsz, + batch_size // WORLD_SIZE * 2, + gs, + single_cls, + hyp=hyp, + cache=None if noval else opt.cache, + rect=True, + rank=-1, + workers=workers * 2, + pad=0.5, + mask_downsample_ratio=mask_ratio, + overlap_mask=overlap, + prefix=colorstr('val: '))[0] + + if not resume: + labels = np.concatenate(dataset.labels, 0) + # c = torch.tensor(labels[:, 0]) # classes + # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency + # model._initialize_biases(cf.to(device)) + if plots: + plot_labels(labels, names, save_dir) + + # Anchors + if not opt.noautoanchor: + check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) + model.half().float() # pre-reduce anchor precision + + # DDP mode + if cuda and RANK != -1: + if check_version(torch.__version__, '1.11.0'): + model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) + else: + model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) + + # Model attributes + nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) + hyp['box'] *= 3 / nl # scale to layers + hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers + hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers + hyp['label_smoothing'] = opt.label_smoothing + model.nc = nc # attach number of classes to model + model.hyp = hyp # attach hyperparameters to model + model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights + model.names = names + + # Start training + t0 = time.time() + nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) + # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training + last_opt_step = -1 + maps = np.zeros(nc) # mAP per class + results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + scheduler.last_epoch = start_epoch - 1 # do not move + scaler = torch.cuda.amp.GradScaler(enabled=amp) + stopper, stop = EarlyStopping(patience=opt.patience), False + compute_loss = ComputeLoss(model, overlap=overlap) # init loss class + LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' + f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' + f"Logging results to {colorstr('bold', save_dir)}\n" + f'Starting training for {epochs} epochs...') + for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ + model.train() + + # Update image weights (optional, single-GPU only) + if opt.image_weights: + cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights + iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights + dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx + + # Update mosaic border (optional) + # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) + # dataset.mosaic_border = [b - imgsz, -b] # height, width borders + + mloss = torch.zeros(4, device=device) # mean losses + if RANK != -1: + train_loader.sampler.set_epoch(epoch) + pbar = enumerate(train_loader) + LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) + if RANK in {-1, 0}: + pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar + optimizer.zero_grad() + for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------------- + ni = i + nb * epoch # number integrated batches (since train start) + imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 + + # Warmup + if ni <= nw: + xi = [0, nw] # x interp + # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) + accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) + for j, x in enumerate(optimizer.param_groups): + # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 + x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) + if 'momentum' in x: + x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) + + # Multi-scale + if opt.multi_scale: + sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size + sf = sz / max(imgs.shape[2:]) # scale factor + if sf != 1: + ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) + imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) + + # Forward + with torch.cuda.amp.autocast(amp): + pred = model(imgs) # forward + loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float()) # loss scaled by batch_size + if RANK != -1: + loss *= WORLD_SIZE # gradient averaged between devices in DDP mode + if opt.quad: + loss *= 4. + + # Backward + scaler.scale(loss).backward() + + # Optimize + if ni - last_opt_step >= accumulate: + scaler.step(optimizer) # optimizer.step + scaler.update() + optimizer.zero_grad() + if ema: + ema.update(model) + last_opt_step = ni + + # Log + if RANK in {-1, 0}: + mloss = (mloss * i + loss_items) / (i + 1) # update mean losses + mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) + pbar.set_description(("%10s" * 2 + "%10.4g" * 6) + % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1])) + # for plots + if mask_ratio != 1: + masks = F.interpolate( + masks[None, :].float(), + (imgsz, imgsz), + mode="bilinear", + align_corners=False, + ).squeeze(0) + #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots) + if plots: + if ni < 3: + f = save_dir / f"train_batch{ni}.jpg" # filename + plot_images_and_masks(imgs, targets, masks, paths, f) + + if ni == 10: + files = sorted(save_dir.glob('train*.jpg')) + logger.log_images(files, "Mosaics") + # end batch ------------------------------------------------------------------------------------------------ + + # Scheduler + lr = [x['lr'] for x in optimizer.param_groups] # for loggers + scheduler.step() + + if RANK in {-1, 0}: + # mAP + # callbacks.run('on_train_epoch_end', epoch=epoch) + ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) + final_epoch = (epoch + 1 == epochs) or stopper.possible_stop + if not noval or final_epoch: # Calculate mAP + results, maps, _ = val.run(data_dict, + batch_size=batch_size // WORLD_SIZE * 2, + imgsz=imgsz, + model=ema.ema, + single_cls=single_cls, + dataloader=val_loader, + save_dir=save_dir, + plots=plots, + #callbacks=callbacks, + compute_loss=compute_loss, + mask_downsample_ratio=mask_ratio, + overlap=overlap) + # Update best mAP + fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + stop = stopper(epoch=epoch, fitness=fi) # early stop check + if fi > best_fitness: + best_fitness = fi + log_vals = list(mloss) + list(results) + lr + metrics_dict = dict(zip(KEYS, log_vals)) + logger.log_metrics(metrics_dict, epoch) + #callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) + + # Save model + if (not nosave) or (final_epoch and not evolve): # if save + ckpt = { + 'epoch': epoch, + 'best_fitness': best_fitness, + 'model': deepcopy(de_parallel(model)).half(), + 'ema': deepcopy(ema.ema).half(), + 'updates': ema.updates, + 'optimizer': optimizer.state_dict(), + #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, + 'date': datetime.now().isoformat()} + + # Save last, best and delete + torch.save(ckpt, last) + if best_fitness == fi: + torch.save(ckpt, best) + if opt.save_period > 0 and epoch % opt.save_period == 0: + torch.save(ckpt, w / f'epoch{epoch}.pt') + logger.log_model(w / f'epoch{epoch}.pt') + del ckpt + + + # EarlyStopping + if RANK != -1: # if DDP training + broadcast_list = [stop if RANK == 0 else None] + dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks + if RANK != 0: + stop = broadcast_list[0] + if stop: + break # must break all DDP ranks + + # end epoch ---------------------------------------------------------------------------------------------------- + # end training ----------------------------------------------------------------------------------------------------- + if RANK in {-1, 0}: + LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') + for f in last, best: + if f.exists(): + strip_optimizer(f) # strip optimizers + if f is best: + LOGGER.info(f'\nValidating {f}...') + results, _, _ = val.run( + data_dict, + batch_size=batch_size // WORLD_SIZE * 2, + imgsz=imgsz, + model=attempt_load(f, device).half(), + iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65 + single_cls=single_cls, + dataloader=val_loader, + save_dir=save_dir, + save_json=is_coco, + verbose=True, + plots=plots, + #callbacks=callbacks, + compute_loss=compute_loss, + mask_downsample_ratio=mask_ratio, + overlap=overlap) # val best model with plots + if is_coco: + metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) + logger.log_metrics(metrics_dict, epoch) + #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) + # on train end callback using genericLogger + logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1) + if plots: + plot_results_with_masks(file=save_dir / 'results.csv') # save results.png + files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] + files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") + logger.log_images(files) + # callbacks.run('on_train_end', last, best, plots, epoch, results) + + torch.cuda.empty_cache() + return results + + + +def parse_opt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') + parser.add_argument('--cfg', type=str, default='', help='model.yaml path') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') + parser.add_argument('--epochs', type=int, default=300) + parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') + parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') + parser.add_argument('--rect', action='store_true', help='rectangular training') + parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') + parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') + parser.add_argument('--noval', action='store_true', help='only validate final epoch') + parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') + parser.add_argument('--noplots', action='store_true', help='save no plot files') + parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') + parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') + parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') + parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') + parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') + parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') + parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') + parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') + parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name') + parser.add_argument('--name', default='exp', help='save to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--quad', action='store_true', help='quad dataloader') + parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') + parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') + parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') + parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') + parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') + parser.add_argument('--seed', type=int, default=0, help='Global training seed') + parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') + parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory') + parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease') + + # Weights & Biases arguments + parser.add_argument('--entity', default=None, help='W&B: Entity') + parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') + parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') + parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') + + opt = parser.parse_known_args()[0] if known else parser.parse_args() + return opt + + +def main(opt, callbacks=Callbacks()): + # Checks + if RANK in {-1, 0}: + print_args(vars(opt)) + check_git_status() + check_requirements(exclude=['thop']) + + # Resume + if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run + ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path + assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' + with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: + opt = argparse.Namespace(**yaml.safe_load(f)) # replace + opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate + LOGGER.info(f'Resuming training from {ckpt}') + else: + opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ + check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks + assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' + if opt.evolve: + if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve + opt.project = str(ROOT / 'runs/evolve') + opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume + if opt.name == 'cfg': + opt.name = Path(opt.cfg).stem # use model.yaml as name + opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) + + # DDP mode + device = select_device(opt.device, batch_size=opt.batch_size) + if LOCAL_RANK != -1: + msg = 'is not compatible with YOLOv5 Multi-GPU DDP training' + assert not opt.image_weights, f'--image-weights {msg}' + assert not opt.evolve, f'--evolve {msg}' + assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' + assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + device = torch.device('cuda', LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + # Train + if not opt.evolve: + train(opt.hyp, opt, device, callbacks) + if WORLD_SIZE > 1 and RANK == 0: + LOGGER.info('Destroying process group... ') + dist.destroy_process_group() + + # Evolve hyperparameters (optional) + else: + # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) + meta = { + 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) + 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) + 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 + 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay + 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) + 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum + 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr + 'box': (1, 0.02, 0.2), # box loss gain + 'cls': (1, 0.2, 4.0), # cls loss gain + 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight + 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) + 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight + 'iou_t': (0, 0.1, 0.7), # IoU training threshold + 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold + 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) + 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) + 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) + 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) + 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) + 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) + 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) + 'scale': (1, 0.0, 0.9), # image scale (+/- gain) + 'shear': (1, 0.0, 10.0), # image shear (+/- deg) + 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 + 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) + 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) + 'mosaic': (1, 0.0, 1.0), # image mixup (probability) + 'mixup': (1, 0.0, 1.0), # image mixup (probability) + 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) + + with open(opt.hyp, errors='ignore') as f: + hyp = yaml.safe_load(f) # load hyps dict + if 'anchors' not in hyp: # anchors commented in hyp.yaml + hyp['anchors'] = 3 + opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch + # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices + evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' + if opt.bucket: + os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists + + for _ in range(opt.evolve): # generations to evolve + if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate + # Select parent(s) + parent = 'single' # parent selection method: 'single' or 'weighted' + x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) + n = min(5, len(x)) # number of previous results to consider + x = x[np.argsort(-fitness(x))][:n] # top n mutations + w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) + if parent == 'single' or len(x) == 1: + # x = x[random.randint(0, n - 1)] # random selection + x = x[random.choices(range(n), weights=w)[0]] # weighted selection + elif parent == 'weighted': + x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination + + # Mutate + mp, s = 0.8, 0.2 # mutation probability, sigma + npr = np.random + npr.seed(int(time.time())) + g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 + ng = len(meta) + v = np.ones(ng) + while all(v == 1): # mutate until a change occurs (prevent duplicates) + v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) + for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) + hyp[k] = float(x[i + 7] * v[i]) # mutate + + # Constrain to limits + for k, v in meta.items(): + hyp[k] = max(hyp[k], v[1]) # lower limit + hyp[k] = min(hyp[k], v[2]) # upper limit + hyp[k] = round(hyp[k], 5) # significant digits + + # Train mutation + results = train(hyp.copy(), opt, device, callbacks) + callbacks = Callbacks() + # Write mutation results + print_mutation(results, hyp.copy(), save_dir, opt.bucket) + + # Plot results + plot_evolve(evolve_csv) + LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' + f"Results saved to {colorstr('bold', save_dir)}\n" + f'Usage example: $ python train.py --hyp {evolve_yaml}') + + +def run(**kwargs): + # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt') + opt = parse_opt(True) + for k, v in kwargs.items(): + setattr(opt, k, v) + main(opt) + return opt + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 6e3696718b6b..e82bfc74a1bc 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -5,6 +5,7 @@ import os import warnings +from pathlib import Path import pkg_resources as pkg import torch @@ -285,3 +286,81 @@ def on_train_end(self, last, best, plots, epoch, results): name=f'run_{self.wandb.wandb_run.id}_model', aliases=['latest', 'best', 'stripped']) self.wandb.finish_run() + +class GenericLogger: + """ + YOLOv5 General purpose logger for non-task specific logging + Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...) + Arguments + opt: Run arguments + console_logger: Console logger + include: loggers to include + """ + + def __init__(self, opt, console_logger, include=('tb', 'wandb')): + # init default loggers + self.save_dir = Path(opt.save_dir) + self.include = include + self.console_logger = console_logger + if 'tb' in self.include: + prefix = colorstr('TensorBoard: ') + self.console_logger.info( + f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/") + self.tb = SummaryWriter(str(self.save_dir)) + + if wandb and 'wandb' in self.include: + self.wandb = wandb.init(project="YOLOv5" if opt.project == "runs/train_segment" else opt.project, + name=None if opt.name == "exp" else opt.name, + config=opt) + else: + self.wandb = None + + def log_metrics(self, metrics_dict, epoch): + # Log metrics dictionary to all loggers + if self.tb: + for k, v in metrics_dict.items(): + self.tb.add_scalar(k, v, epoch) + + if self.wandb: + self.wandb.log(metrics_dict, step=epoch) + + def log_images(self, files, name='Images', epoch=0): + # Log images to all loggers + files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path + files = [f for f in files if f.exists()] # filter by exists + + if self.tb: + for f in files: + self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') + + if self.wandb: + self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch) + + def log_graph(self, model, imgsz=(640, 640)): + # Log model graph to all loggers + if self.tb: + log_tensorboard_graph(self.tb, model, imgsz) + + def log_model(self, model_path, epoch=0, metadata={}): + # Log model to all loggers + if self.wandb: + art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata) + art.add_file(str(model_path)) + wandb.log_artifact(art) + + def update_params(self, params): + # Update the paramters logged + if self.wandb: + wandb.run.config.update(params, allow_val_change=True) + +def log_tensorboard_graph(tb, model, imgsz=(640, 640)): + # Log model graph to TensorBoard + try: + p = next(model.parameters()) # for device, type + imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand + im = torch.empty((1, 3, *imgsz)).to(p.device).type_as(p) # input image + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress jit trace warning + tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), []) + except Exception: + print('WARNING: TensorBoard graph visualization failure') \ No newline at end of file diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py index a3c0acd23920..65e3011f9f12 100644 --- a/utils/segment/metrics.py +++ b/utils/segment/metrics.py @@ -144,3 +144,37 @@ def get_maps(self, nc): def ap_class_index(self): # boxes and masks have the same ap_class_index return self.metric_box.ap_class_index + +KEYS = [ + "train/box_loss", + "train/seg_loss", # train loss + "train/obj_loss", + "train/cls_loss", + "metrics/precision(B)", + "metrics/recall(B)", + "metrics/mAP_0.5(B)", + "metrics/mAP_0.5:0.95(B)", # metrics + "metrics/precision(M)", + "metrics/recall(M)", + "metrics/mAP_0.5(M)", + "metrics/mAP_0.5:0.95(M)", # metrics + "val/box_loss", + "val/seg_loss", # val loss + "val/obj_loss", + "val/cls_loss", + "x/lr0", + "x/lr1", + "x/lr2", + ] + +BEST_KEYS = [ + "best/epoch", + "best/precision(B)", + "best/recall(B)", + "best/mAP_0.5(B)", + "best/mAP_0.5:0.95(B)", + "best/precision(M)", + "best/recall(M)", + "best/mAP_0.5(M)", + "best/mAP_0.5:0.95(M)", + ] \ No newline at end of file From 3e2312d7b748d0e8dbc9813d14a2edbc4671bc12 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 20:04:53 +0530 Subject: [PATCH 062/247] no deterministic behaviour --- utils/general.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/general.py b/utils/general.py index 040da33c85f3..0846278083cf 100644 --- a/utils/general.py +++ b/utils/general.py @@ -202,7 +202,7 @@ def init_seeds(seed=0, deterministic=False): import torch.backends.cudnn as cudnn if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 - torch.use_deterministic_algorithms(True) + #torch.use_deterministic_algorithms(True) os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' os.environ['PYTHONHASHSEED'] = str(seed) From aead6e92aa7f0a246da74bb9bf650d454907c89d Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 20:27:19 +0530 Subject: [PATCH 063/247] update --- segment/train_temp.py | 7 ++++--- segment/val.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/segment/train_temp.py b/segment/train_temp.py index 57c23811cf53..b1293387cb63 100644 --- a/segment/train_temp.py +++ b/segment/train_temp.py @@ -324,7 +324,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders - mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) @@ -430,10 +429,12 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr + # Log val metrics and media metrics_dict = dict(zip(KEYS, log_vals)) logger.log_metrics(metrics_dict, epoch) - #callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) - + if plots: + files = sorted(save_dir.glob('val*.jpg')) + logger.log_images(files, "Validation") # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { diff --git a/segment/val.py b/segment/val.py index 1045a3959793..a301f636fb7d 100644 --- a/segment/val.py +++ b/segment/val.py @@ -372,7 +372,7 @@ def run( # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) - callbacks.run('on_val_end') + #callbacks.run('on_val_end') # in case the cocoeval will update map ( From 31670a20b6cbd3205a37290a314bf99024735857 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 20:41:34 +0530 Subject: [PATCH 064/247] update --- segment/train_temp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/segment/train_temp.py b/segment/train_temp.py index b1293387cb63..91168d533320 100644 --- a/segment/train_temp.py +++ b/segment/train_temp.py @@ -497,12 +497,14 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) # on train end callback using genericLogger logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1) + if not opt.evolve: + logger.log_model(best, epoch+1) if plots: plot_results_with_masks(file=save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") - logger.log_images(files) + logger.log_images(files, "Results") # callbacks.run('on_train_end', last, best, plots, epoch, results) torch.cuda.empty_cache() From 8a83f6509d136274b6eec289e56b7e948115980f Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 20:52:11 +0530 Subject: [PATCH 065/247] update train --- segment/train.py | 80 +++-- segment/train_temp.py | 711 ------------------------------------------ 2 files changed, 46 insertions(+), 745 deletions(-) delete mode 100644 segment/train_temp.py diff --git a/segment/train.py b/segment/train.py index ea3ca58316c0..5986aa4278ad 100644 --- a/segment/train.py +++ b/segment/train.py @@ -50,7 +50,7 @@ check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) -from utils.loggers import LoggersMask +from utils.loggers import GenericLogger from utils.loggers.wandb.wandb_utils import check_wandb_resume from utils.segment.loss import ComputeLoss from utils.segment.metrics import fitness @@ -63,6 +63,8 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) from utils.general import LOGGER, check_amp, check_version from utils.autobatch import check_train_batch_size +from utils.segment.plots import plot_images_and_masks, plot_results_with_masks +from utils.segment.metrics import KEYS, BEST_KEYS from torch.optim import AdamW import yaml from datetime import datetime @@ -71,7 +73,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio - callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir @@ -94,13 +95,13 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Loggers data_dict = None if RANK in {-1, 0}: - loggers = LoggersMask( - save_dir=save_dir, opt=opt, logger=LOGGER + logger = GenericLogger( + opt=opt, console_logger=LOGGER ) # loggers instance # Register actions - for k in methods(loggers): - callbacks.register_action(k, callback=getattr(loggers, k)) + # for k in methods(loggers): + # callbacks.register_action(k, callback=getattr(loggers, k)) # Config plots = not evolve and not opt.noplots # create plots @@ -147,7 +148,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) - loggers.on_params_update({"batch_size": batch_size}) + logger.update_params({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size @@ -278,8 +279,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) model.half().float() # pre-reduce anchor precision - callbacks.run('on_pretrain_routine_end') - # DDP mode if cuda and RANK != -1: if check_version(torch.__version__, '1.11.0'): @@ -309,13 +308,11 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model, overlap=overlap) # init loss class - callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ - callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) @@ -327,7 +324,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders - mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) @@ -337,7 +333,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------------- - callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 @@ -395,10 +390,15 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio mode="bilinear", align_corners=False, ).squeeze(0) - callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots) - - if callbacks.stop_training: - return + #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots) + if plots: + if ni < 3: + f = save_dir / f"train_batch{ni}.jpg" # filename + plot_images_and_masks(imgs, targets, masks, paths, f) + + if ni == 10: + files = sorted(save_dir.glob('train*.jpg')) + logger.log_images(files, "Mosaics") # end batch ------------------------------------------------------------------------------------------------ # Scheduler @@ -407,7 +407,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if RANK in {-1, 0}: # mAP - callbacks.run('on_train_epoch_end', epoch=epoch) + # callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP @@ -419,7 +419,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio dataloader=val_loader, save_dir=save_dir, plots=plots, - callbacks=callbacks, + #callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) @@ -429,8 +429,12 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr - callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) - + # Log val metrics and media + metrics_dict = dict(zip(KEYS, log_vals)) + logger.log_metrics(metrics_dict, epoch) + if plots: + files = sorted(save_dir.glob('val*.jpg')) + logger.log_images(files, "Validation") # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { @@ -449,8 +453,9 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') + logger.log_model(w / f'epoch{epoch}.pt') del ckpt - callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) + # EarlyStopping if RANK != -1: # if DDP training @@ -482,14 +487,25 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio save_json=is_coco, verbose=True, plots=plots, - callbacks=callbacks, + #callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # val best model with plots if is_coco: - callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) - - callbacks.run('on_train_end', last, best, plots, epoch, results) + metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) + logger.log_metrics(metrics_dict, epoch) + #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) + # on train end callback using genericLogger + logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1) + if not opt.evolve: + logger.log_model(best, epoch+1) + if plots: + plot_results_with_masks(file=save_dir / 'results.csv') # save results.png + files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] + files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") + logger.log_images(files, "Results") + # callbacks.run('on_train_end', last, best, plots, epoch, results) torch.cuda.empty_cache() return results @@ -521,7 +537,7 @@ def parse_opt(known=False): parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') - parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') + parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') @@ -532,15 +548,11 @@ def parse_opt(known=False): parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') - parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory') + + # Instance Segmentation Args + parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory') parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease') - # Weights & Biases arguments - parser.add_argument('--entity', default=None, help='W&B: Entity') - parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') - parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') - parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') - opt = parser.parse_known_args()[0] if known else parser.parse_args() return opt diff --git a/segment/train_temp.py b/segment/train_temp.py deleted file mode 100644 index 91168d533320..000000000000 --- a/segment/train_temp.py +++ /dev/null @@ -1,711 +0,0 @@ -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Train a YOLOv5 model on a custom dataset. - -Models and datasets download automatically from the latest YOLOv5 release. -Models: https://github.com/ultralytics/yolov5/tree/master/models -Datasets: https://github.com/ultralytics/yolov5/tree/master/data -Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data - -Usage: - $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED) - $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch -""" - -import argparse -import math -import os -import random -import sys -import time -from copy import deepcopy -from datetime import datetime -from pathlib import Path - -import val # for end-of-epoch mAP -import numpy as np -import torch -import torch.distributed as dist -import torch.nn as nn -import yaml -from torch.nn.parallel import DistributedDataParallel as DDP -import torch.nn.functional as F -from torch.optim import SGD, Adam, AdamW, lr_scheduler -from tqdm import tqdm - -FILE = Path(__file__).resolve() -ROOT = FILE.parents[1] # YOLOv5 root directory -if str(ROOT) not in sys.path: - sys.path.append(str(ROOT)) # add ROOT to PATH -ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative - -from models.experimental import attempt_load -from models.yolo import Model -from utils.autoanchor import check_anchors -from utils.autobatch import check_train_batch_size -from utils.callbacks import Callbacks -from utils.segment.dataloaders import create_dataloader -from utils.downloads import attempt_download -from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, - check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, - increment_path, init_seeds, intersect_dicts, labels_to_class_weights, - labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) -from utils.loggers import GenericLogger -from utils.loggers.wandb.wandb_utils import check_wandb_resume -from utils.segment.loss import ComputeLoss -from utils.segment.metrics import fitness -from utils.plots import plot_evolve, plot_labels -from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first - - -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -from utils.general import LOGGER, check_amp, check_version -from utils.autobatch import check_train_batch_size -from utils.segment.plots import plot_images_and_masks, plot_results_with_masks -from utils.segment.metrics import KEYS, BEST_KEYS -from torch.optim import AdamW -import yaml -from datetime import datetime - -def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary - save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ - Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ - opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio - - # Directories - w = save_dir / 'weights' # weights dir - (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir - last, best = w / 'last.pt', w / 'best.pt' - - # Hyperparameters - if isinstance(hyp, str): - with open(hyp, errors='ignore') as f: - hyp = yaml.safe_load(f) # load hyps dict - LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) - - # Save run settings - if not evolve: - with open(save_dir / 'hyp.yaml', 'w') as f: - yaml.safe_dump(hyp, f, sort_keys=False) - with open(save_dir / 'opt.yaml', 'w') as f: - yaml.safe_dump(vars(opt), f, sort_keys=False) - - # Loggers - data_dict = None - if RANK in {-1, 0}: - logger = GenericLogger( - opt=opt, console_logger=LOGGER - ) # loggers instance - - # Register actions - # for k in methods(loggers): - # callbacks.register_action(k, callback=getattr(loggers, k)) - - # Config - plots = not evolve and not opt.noplots # create plots - overlap = opt.overlap_mask - cuda = device.type != 'cpu' - init_seeds(opt.seed + 1 + RANK, True) - with torch_distributed_zero_first(LOCAL_RANK): - data_dict = data_dict or check_dataset(data) # check if None - train_path, val_path = data_dict['train'], data_dict['val'] - nc = 1 if single_cls else int(data_dict['nc']) # number of classes - names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names - assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check - is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset - - # Model - check_suffix(weights, '.pt') # check weights - pretrained = weights.endswith('.pt') - if pretrained: - with torch_distributed_zero_first(LOCAL_RANK): - weights = attempt_download(weights) # download if not found locally - ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak - model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create - exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys - csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 - csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect - model.load_state_dict(csd, strict=False) # load - LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report - else: - model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create - amp = check_amp(model) # check AMP - - # Freeze - freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze - for k, v in model.named_parameters(): - v.requires_grad = True # train all layers - if any(x in k for x in freeze): - LOGGER.info(f'freezing {k}') - v.requires_grad = False - - # Image size - gs = max(int(model.stride.max()), 32) # grid size (max stride) - imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple - - # Batch size - if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size - batch_size = check_train_batch_size(model, imgsz, amp) - logger.update_params({"batch_size": batch_size}) - - # Optimizer - nbs = 64 # nominal batch size - accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing - hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay - LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") - - g = [], [], [] # optimizer parameter groups - bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() - for v in model.modules(): - if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias - g[2].append(v.bias) - if isinstance(v, bn): # weight (no decay) - g[1].append(v.weight) - elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) - g[0].append(v.weight) - - # hyp['lr0'] = hyp['lr0'] / batch_size * 128 - # hyp['warmup_bias_lr'] = 0.01 - if opt.optimizer == 'Adam': - optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum - elif opt.optimizer == 'AdamW': - optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum - else: - optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) - - optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay - optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights) - LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " - f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias") - del g - - # Scheduler - if opt.cos_lr: - lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] - else: - lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear - scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) - - # EMA - ema = ModelEMA(model) if RANK in {-1, 0} else None - - # Resume - start_epoch, best_fitness = 0, 0.0 - if pretrained: - # Optimizer - if ckpt['optimizer'] is not None: - optimizer.load_state_dict(ckpt['optimizer']) - best_fitness = ckpt['best_fitness'] - - # EMA - if ema and ckpt.get('ema'): - ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) - ema.updates = ckpt['updates'] - - # Epochs - start_epoch = ckpt['epoch'] + 1 - if resume: - assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.' - if epochs < start_epoch: - LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") - epochs += ckpt['epoch'] # finetune additional epochs - - del ckpt, csd - - # DP mode - if cuda and RANK == -1 and torch.cuda.device_count() > 1: - LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n' - 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') - model = torch.nn.DataParallel(model) - - # SyncBatchNorm - if opt.sync_bn and cuda and RANK != -1: - model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) - LOGGER.info('Using SyncBatchNorm()') - - # Trainloader - train_loader, dataset = create_dataloader(train_path, - imgsz, - batch_size // WORLD_SIZE, - gs, - single_cls, - hyp=hyp, - augment=True, - cache=None if opt.cache == 'val' else opt.cache, - rect=opt.rect, - rank=LOCAL_RANK, - workers=workers, - image_weights=opt.image_weights, - quad=opt.quad, - prefix=colorstr('train: '), - shuffle=True, - mask_downsample_ratio=mask_ratio, - overlap_mask=overlap, - ) - mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class - print("mlc , nc ", mlc, " ", nc ) - nb = len(train_loader) # number of batches - assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' - - # Process 0 - if RANK in {-1, 0}: - val_loader = create_dataloader(val_path, - imgsz, - batch_size // WORLD_SIZE * 2, - gs, - single_cls, - hyp=hyp, - cache=None if noval else opt.cache, - rect=True, - rank=-1, - workers=workers * 2, - pad=0.5, - mask_downsample_ratio=mask_ratio, - overlap_mask=overlap, - prefix=colorstr('val: '))[0] - - if not resume: - labels = np.concatenate(dataset.labels, 0) - # c = torch.tensor(labels[:, 0]) # classes - # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency - # model._initialize_biases(cf.to(device)) - if plots: - plot_labels(labels, names, save_dir) - - # Anchors - if not opt.noautoanchor: - check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) - model.half().float() # pre-reduce anchor precision - - # DDP mode - if cuda and RANK != -1: - if check_version(torch.__version__, '1.11.0'): - model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) - else: - model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) - - # Model attributes - nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) - hyp['box'] *= 3 / nl # scale to layers - hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers - hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers - hyp['label_smoothing'] = opt.label_smoothing - model.nc = nc # attach number of classes to model - model.hyp = hyp # attach hyperparameters to model - model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights - model.names = names - - # Start training - t0 = time.time() - nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) - # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training - last_opt_step = -1 - maps = np.zeros(nc) # mAP per class - results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) - scheduler.last_epoch = start_epoch - 1 # do not move - scaler = torch.cuda.amp.GradScaler(enabled=amp) - stopper, stop = EarlyStopping(patience=opt.patience), False - compute_loss = ComputeLoss(model, overlap=overlap) # init loss class - LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' - f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' - f"Logging results to {colorstr('bold', save_dir)}\n" - f'Starting training for {epochs} epochs...') - for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ - model.train() - - # Update image weights (optional, single-GPU only) - if opt.image_weights: - cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights - iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights - dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx - - # Update mosaic border (optional) - # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) - # dataset.mosaic_border = [b - imgsz, -b] # height, width borders - mloss = torch.zeros(4, device=device) # mean losses - if RANK != -1: - train_loader.sampler.set_epoch(epoch) - pbar = enumerate(train_loader) - LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) - if RANK in {-1, 0}: - pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar - optimizer.zero_grad() - for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------------- - ni = i + nb * epoch # number integrated batches (since train start) - imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 - - # Warmup - if ni <= nw: - xi = [0, nw] # x interp - # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) - accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) - for j, x in enumerate(optimizer.param_groups): - # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 - x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) - if 'momentum' in x: - x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) - - # Multi-scale - if opt.multi_scale: - sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size - sf = sz / max(imgs.shape[2:]) # scale factor - if sf != 1: - ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) - imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) - - # Forward - with torch.cuda.amp.autocast(amp): - pred = model(imgs) # forward - loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float()) # loss scaled by batch_size - if RANK != -1: - loss *= WORLD_SIZE # gradient averaged between devices in DDP mode - if opt.quad: - loss *= 4. - - # Backward - scaler.scale(loss).backward() - - # Optimize - if ni - last_opt_step >= accumulate: - scaler.step(optimizer) # optimizer.step - scaler.update() - optimizer.zero_grad() - if ema: - ema.update(model) - last_opt_step = ni - - # Log - if RANK in {-1, 0}: - mloss = (mloss * i + loss_items) / (i + 1) # update mean losses - mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) - pbar.set_description(("%10s" * 2 + "%10.4g" * 6) - % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1])) - # for plots - if mask_ratio != 1: - masks = F.interpolate( - masks[None, :].float(), - (imgsz, imgsz), - mode="bilinear", - align_corners=False, - ).squeeze(0) - #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots) - if plots: - if ni < 3: - f = save_dir / f"train_batch{ni}.jpg" # filename - plot_images_and_masks(imgs, targets, masks, paths, f) - - if ni == 10: - files = sorted(save_dir.glob('train*.jpg')) - logger.log_images(files, "Mosaics") - # end batch ------------------------------------------------------------------------------------------------ - - # Scheduler - lr = [x['lr'] for x in optimizer.param_groups] # for loggers - scheduler.step() - - if RANK in {-1, 0}: - # mAP - # callbacks.run('on_train_epoch_end', epoch=epoch) - ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) - final_epoch = (epoch + 1 == epochs) or stopper.possible_stop - if not noval or final_epoch: # Calculate mAP - results, maps, _ = val.run(data_dict, - batch_size=batch_size // WORLD_SIZE * 2, - imgsz=imgsz, - model=ema.ema, - single_cls=single_cls, - dataloader=val_loader, - save_dir=save_dir, - plots=plots, - #callbacks=callbacks, - compute_loss=compute_loss, - mask_downsample_ratio=mask_ratio, - overlap=overlap) - # Update best mAP - fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] - stop = stopper(epoch=epoch, fitness=fi) # early stop check - if fi > best_fitness: - best_fitness = fi - log_vals = list(mloss) + list(results) + lr - # Log val metrics and media - metrics_dict = dict(zip(KEYS, log_vals)) - logger.log_metrics(metrics_dict, epoch) - if plots: - files = sorted(save_dir.glob('val*.jpg')) - logger.log_images(files, "Validation") - # Save model - if (not nosave) or (final_epoch and not evolve): # if save - ckpt = { - 'epoch': epoch, - 'best_fitness': best_fitness, - 'model': deepcopy(de_parallel(model)).half(), - 'ema': deepcopy(ema.ema).half(), - 'updates': ema.updates, - 'optimizer': optimizer.state_dict(), - #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, - 'date': datetime.now().isoformat()} - - # Save last, best and delete - torch.save(ckpt, last) - if best_fitness == fi: - torch.save(ckpt, best) - if opt.save_period > 0 and epoch % opt.save_period == 0: - torch.save(ckpt, w / f'epoch{epoch}.pt') - logger.log_model(w / f'epoch{epoch}.pt') - del ckpt - - - # EarlyStopping - if RANK != -1: # if DDP training - broadcast_list = [stop if RANK == 0 else None] - dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks - if RANK != 0: - stop = broadcast_list[0] - if stop: - break # must break all DDP ranks - - # end epoch ---------------------------------------------------------------------------------------------------- - # end training ----------------------------------------------------------------------------------------------------- - if RANK in {-1, 0}: - LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') - for f in last, best: - if f.exists(): - strip_optimizer(f) # strip optimizers - if f is best: - LOGGER.info(f'\nValidating {f}...') - results, _, _ = val.run( - data_dict, - batch_size=batch_size // WORLD_SIZE * 2, - imgsz=imgsz, - model=attempt_load(f, device).half(), - iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65 - single_cls=single_cls, - dataloader=val_loader, - save_dir=save_dir, - save_json=is_coco, - verbose=True, - plots=plots, - #callbacks=callbacks, - compute_loss=compute_loss, - mask_downsample_ratio=mask_ratio, - overlap=overlap) # val best model with plots - if is_coco: - metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) - logger.log_metrics(metrics_dict, epoch) - #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) - # on train end callback using genericLogger - logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1) - if not opt.evolve: - logger.log_model(best, epoch+1) - if plots: - plot_results_with_masks(file=save_dir / 'results.csv') # save results.png - files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] - files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter - LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") - logger.log_images(files, "Results") - # callbacks.run('on_train_end', last, best, plots, epoch, results) - - torch.cuda.empty_cache() - return results - - - -def parse_opt(known=False): - parser = argparse.ArgumentParser() - parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') - parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') - parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') - parser.add_argument('--epochs', type=int, default=300) - parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') - parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') - parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') - parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') - parser.add_argument('--noval', action='store_true', help='only validate final epoch') - parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') - parser.add_argument('--noplots', action='store_true', help='save no plot files') - parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') - parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') - parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') - parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') - parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') - parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') - parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') - parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') - parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name') - parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') - parser.add_argument('--quad', action='store_true', help='quad dataloader') - parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') - parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') - parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') - parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') - parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') - parser.add_argument('--seed', type=int, default=0, help='Global training seed') - parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') - parser.add_argument('--mask-ratio', type=int, default=1, help='Downsample the gt masks to saving memory') - parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease') - - # Weights & Biases arguments - parser.add_argument('--entity', default=None, help='W&B: Entity') - parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') - parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') - parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') - - opt = parser.parse_known_args()[0] if known else parser.parse_args() - return opt - - -def main(opt, callbacks=Callbacks()): - # Checks - if RANK in {-1, 0}: - print_args(vars(opt)) - check_git_status() - check_requirements(exclude=['thop']) - - # Resume - if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run - ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path - assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' - with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: - opt = argparse.Namespace(**yaml.safe_load(f)) # replace - opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate - LOGGER.info(f'Resuming training from {ckpt}') - else: - opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ - check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks - assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' - if opt.evolve: - if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve - opt.project = str(ROOT / 'runs/evolve') - opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume - if opt.name == 'cfg': - opt.name = Path(opt.cfg).stem # use model.yaml as name - opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) - - # DDP mode - device = select_device(opt.device, batch_size=opt.batch_size) - if LOCAL_RANK != -1: - msg = 'is not compatible with YOLOv5 Multi-GPU DDP training' - assert not opt.image_weights, f'--image-weights {msg}' - assert not opt.evolve, f'--evolve {msg}' - assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' - assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' - assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - torch.cuda.set_device(LOCAL_RANK) - device = torch.device('cuda', LOCAL_RANK) - dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") - - # Train - if not opt.evolve: - train(opt.hyp, opt, device, callbacks) - if WORLD_SIZE > 1 and RANK == 0: - LOGGER.info('Destroying process group... ') - dist.destroy_process_group() - - # Evolve hyperparameters (optional) - else: - # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) - meta = { - 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) - 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) - 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 - 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay - 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) - 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum - 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr - 'box': (1, 0.02, 0.2), # box loss gain - 'cls': (1, 0.2, 4.0), # cls loss gain - 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight - 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) - 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight - 'iou_t': (0, 0.1, 0.7), # IoU training threshold - 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold - 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) - 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) - 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) - 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) - 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) - 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) - 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) - 'scale': (1, 0.0, 0.9), # image scale (+/- gain) - 'shear': (1, 0.0, 10.0), # image shear (+/- deg) - 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 - 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) - 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) - 'mosaic': (1, 0.0, 1.0), # image mixup (probability) - 'mixup': (1, 0.0, 1.0), # image mixup (probability) - 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) - - with open(opt.hyp, errors='ignore') as f: - hyp = yaml.safe_load(f) # load hyps dict - if 'anchors' not in hyp: # anchors commented in hyp.yaml - hyp['anchors'] = 3 - opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch - # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices - evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' - if opt.bucket: - os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists - - for _ in range(opt.evolve): # generations to evolve - if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate - # Select parent(s) - parent = 'single' # parent selection method: 'single' or 'weighted' - x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) - n = min(5, len(x)) # number of previous results to consider - x = x[np.argsort(-fitness(x))][:n] # top n mutations - w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) - if parent == 'single' or len(x) == 1: - # x = x[random.randint(0, n - 1)] # random selection - x = x[random.choices(range(n), weights=w)[0]] # weighted selection - elif parent == 'weighted': - x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination - - # Mutate - mp, s = 0.8, 0.2 # mutation probability, sigma - npr = np.random - npr.seed(int(time.time())) - g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 - ng = len(meta) - v = np.ones(ng) - while all(v == 1): # mutate until a change occurs (prevent duplicates) - v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) - for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) - hyp[k] = float(x[i + 7] * v[i]) # mutate - - # Constrain to limits - for k, v in meta.items(): - hyp[k] = max(hyp[k], v[1]) # lower limit - hyp[k] = min(hyp[k], v[2]) # upper limit - hyp[k] = round(hyp[k], 5) # significant digits - - # Train mutation - results = train(hyp.copy(), opt, device, callbacks) - callbacks = Callbacks() - # Write mutation results - print_mutation(results, hyp.copy(), save_dir, opt.bucket) - - # Plot results - plot_evolve(evolve_csv) - LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' - f"Results saved to {colorstr('bold', save_dir)}\n" - f'Usage example: $ python train.py --hyp {evolve_yaml}') - - -def run(**kwargs): - # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt') - opt = parse_opt(True) - for k, v in kwargs.items(): - setattr(opt, k, v) - main(opt) - return opt - - -if __name__ == "__main__": - opt = parse_opt() - main(opt) From b17237ecf4206e35df36e2bf55dbb5c9d88812b1 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 20:54:38 +0530 Subject: [PATCH 066/247] torevert: use newlabels --- data/coco.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/coco.yaml b/data/coco.yaml index 0c0c4adab05d..b354236e25a0 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -8,7 +8,7 @@ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: ../datasets/coco # dataset root dir +path: /datasets/coco # dataset root dir train: train2017.txt # train images (relative to 'path') 118287 images val: val2017.txt # val images (relative to 'path') 5000 images test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 From a5aa7f7ad407ba6d6a6588fbc96cc9e49b4dbd71 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 20:57:11 +0530 Subject: [PATCH 067/247] update --- data/coco.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/coco.yaml b/data/coco.yaml index b354236e25a0..bb9cb849abc9 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -8,7 +8,7 @@ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: /datasets/coco # dataset root dir +path: datasets/coco # dataset root dir train: train2017.txt # train images (relative to 'path') 118287 images val: val2017.txt # val images (relative to 'path') 5000 images test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 From d67711a20a0485beb4fac9b60f0b37d6e6930d8b Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 21:04:19 +0530 Subject: [PATCH 068/247] cleanup --- utils/loggers/__init__.py | 97 --------------------------------------- 1 file changed, 97 deletions(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index e82bfc74a1bc..a0a6b063bb59 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -190,103 +190,6 @@ def on_params_update(self, params): self.wandb.wandb_run.config.update(params, allow_val_change=True) -class LoggersMask(Loggers): - def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS): - super().__init__(save_dir, weights, opt, hyp, logger, include) - self.keys = [ - "train/box_loss", - "train/seg_loss", # train loss - "train/obj_loss", - "train/cls_loss", - "metrics/precision(B)", - "metrics/recall(B)", - "metrics/mAP_0.5(B)", - "metrics/mAP_0.5:0.95(B)", # metrics - "metrics/precision(M)", - "metrics/recall(M)", - "metrics/mAP_0.5(M)", - "metrics/mAP_0.5:0.95(M)", # metrics - "val/box_loss", - "val/seg_loss", # val loss - "val/obj_loss", - "val/cls_loss", - "x/lr0", - "x/lr1", - "x/lr2", - ] # params - self.best_keys = [ - "best/epoch", - "best/precision(B)", - "best/recall(B)", - "best/mAP_0.5(B)", - "best/mAP_0.5:0.95(B)", - "best/precision(M)", - "best/recall(M)", - "best/mAP_0.5(M)", - "best/mAP_0.5:0.95(M)", - ] - - - def on_train_batch_end(self, ni, model, imgs, targets, masks, paths, plots): - if plots: - if ni == 0: - if self.tb and not self.opt.sync_bn: # --sync known issue https://github.com/ultralytics/yolov5/issues/3754 - with warnings.catch_warnings(): - warnings.simplefilter('ignore') # suppress jit trace warning - self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), []) - if ni < 3: - f = self.save_dir / f"train_batch{ni}.jpg" # filename - plot_images_and_masks(imgs, targets, masks, paths, f) - - if self.wandb and ni == 10: - files = sorted(self.save_dir.glob('train*.jpg')) - self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]}) - - def on_fit_epoch_end(self, vals, epoch, best_fitness, fi): - # Callback runs at the end of each fit (train+val) epoch - x = dict(zip(self.keys, vals)) - if self.csv: - file = self.save_dir / 'results.csv' - n = len(x) + 1 # number of cols - s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header - with open(file, 'a') as f: - f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') - - if self.tb: - for k, v in x.items(): - self.tb.add_scalar(k, v, epoch) - - if self.wandb: - if best_fitness == fi: - best_results = [epoch] + vals[4:12] - for i, name in enumerate(self.best_keys): - self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary - self.wandb.log(x) - self.wandb.end_epoch(best_result=best_fitness == fi) - - def on_train_end(self, last, best, plots, epoch, results): - # Callback runs on training end - if plots: - plot_results_with_masks(file=self.save_dir / 'results.csv') # save results.png - files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] - files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter - self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}") - - if self.tb: - for f in files: - self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') - - if self.wandb: - self.wandb.log(dict(zip(self.keys[4:16], results))) - self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) - # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model - if not self.opt.evolve: - wandb.log_artifact(str(best if best.exists() else last), - type='model', - name=f'run_{self.wandb.wandb_run.id}_model', - aliases=['latest', 'best', 'stripped']) - self.wandb.finish_run() - class GenericLogger: """ YOLOv5 General purpose logger for non-task specific logging From 3de63fb51337bbbd45b4d19d0d7934f2f45ea0b1 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 21:48:06 +0530 Subject: [PATCH 069/247] update --- segment/detect.py | 6 +- segment/predict.py | 279 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+), 3 deletions(-) create mode 100644 segment/predict.py diff --git a/segment/detect.py b/segment/detect.py index c751e39a06b8..2eac4e46321f 100644 --- a/segment/detect.py +++ b/segment/detect.py @@ -89,7 +89,7 @@ def run( # Load model device = select_device(device) - model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=True) + model = attempt_load(weights, device=device, inplace=True, fuse=True) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if half else model.float() @@ -181,7 +181,7 @@ def run( annotator.im = img_masks # Write results - for i, (*xyxy, conf, cls) in enumerate(det): + for j, (*xyxy, conf, cls) in enumerate(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format @@ -191,7 +191,7 @@ def run( if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') - annotator.box_label(xyxy, label, color=colors(i, True)) + annotator.box_label(xyxy, label, color=colors(j, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) diff --git a/segment/predict.py b/segment/predict.py new file mode 100644 index 000000000000..dd45690266ec --- /dev/null +++ b/segment/predict.py @@ -0,0 +1,279 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Run inference on images, videos, directories, streams, etc. + +Usage - sources: + $ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam + img.jpg # image + vid.mp4 # video + path/ # directory + path/*.jpg # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream + +Usage - formats: + $ python path/to/detect.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s.xml # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS-only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU +""" + +import argparse +import os +import sys +from pathlib import Path + +import torch +import torch.backends.cudnn as cudnn + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.experimental import attempt_load +from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams +from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, + increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh) +from utils.plots import Annotator, colors, save_one_box +from utils.segment.plots import plot_masks +from utils.torch_utils import select_device, time_sync +from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample + + +@torch.no_grad() +def run( + weights=ROOT / 'yolov5s.pt', # model.pt path(s) + source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / 'runs/predict_segment', # save results to project/name + name='exp', # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference +): + source = str(source) + save_img = not nosave and not source.endswith('.txt') # save inference images + is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) + is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) + webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) + if is_url and is_file: + source = check_file(source) # download + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + device = select_device(device) + model = attempt_load(weights, device=device, inplace=True, fuse=True) + stride = max(int(model.stride.max()), 32) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + model.half() if half else model.float() + pt = True + imgsz = check_img_size(imgsz, s=stride) # check image size + + # Dataloader + if webcam: + view_img = check_imshow() + cudnn.benchmark = True # set True to speed up constant image size inference + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) + bs = len(dataset) # batch_size + else: + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + bs = 1 # batch_size + vid_path, vid_writer = [None] * bs, [None] * bs + + # Run inference + if str(device) != "cpu": + im = torch.zeros(1, 3, *imgsz).to(device).half() # input image + model(im) # warmup + seen, windows, dt = 0, [], [0.0, 0.0, 0.0] + for path, im, im0s, vid_cap, s in dataset: + t1 = time_sync() + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + t2 = time_sync() + dt[0] += t2 - t1 + + # Inference + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred, out = model(im, augment=augment, visualize=visualize) + proto = out[1] + t3 = time_sync() + dt[1] += t3 - t2 + + # NMS + pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + dt[2] += time_sync() - t3 + + # Second-stage classifier (optional) + # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) + + # Process predictions + for i, det in enumerate(pred): # per image + seen += 1 + if webcam: # batch_size >= 1 + p, im0, frame = path[i], im0s[i].copy(), dataset.count + s += f'{i}: ' + else: + p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) + + p = Path(p) # to Path + save_path = str(save_dir / p.name) # im.jpg + txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt + s += '%gx%g ' % im.shape[2:] # print string + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh + imc = im0.copy() if save_crop else im0 # for save_crop + annotator = Annotator(im0, line_width=line_thickness, example=str(names)) + if len(det): + # mask stuff + masks_conf = det[:, 6:] + # binary mask, (img_h, img_w, n) + masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:]) + # n, img_h, img_w + masks = masks.permute(2, 0, 1).contiguous() + # bbox stuff + det = det[:, :6] # update the value in outputs, remove mask part. + # Rescale boxes from img_size to im0 size + det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() + + # Print results + for c in det[:, -1].unique(): + n = (det[:, -1] == c).sum() # detections per class + s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string + + # plot masks + mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))] + # NOTE: this way to draw masks is faster, + # but the image might get blurred, + # from https://github.com/dbolya/yolact + # image with masks, (img_h, img_w, 3) + img_masks = plot_masks(im[i], masks, mcolors) + # scale image to original hw + img_masks = scale_masks(im.shape[2:], img_masks, im0.shape) + annotator.im = img_masks + + # Write results + for j, (*xyxy, conf, cls) in enumerate(det): + if save_txt: # Write to file + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(f'{txt_path}.txt', 'a') as f: + f.write(('%g ' * len(line)).rstrip() % line + '\n') + + if save_img or save_crop or view_img: # Add bbox to image + c = int(cls) # integer class + label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') + annotator.box_label(xyxy, label, color=colors(j, True)) + if save_crop: + save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) + + # Stream results + im0 = annotator.result() + if view_img: + if p not in windows: + windows.append(p) + cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) + cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) + cv2.imshow(str(p), im0) + cv2.waitKey(1) # 1 millisecond + + # Save results (image with detections) + if save_img: + if dataset.mode == 'image': + cv2.imwrite(save_path, im0) + else: # 'video' or 'stream' + if vid_path[i] != save_path: # new video + vid_path[i] = save_path + if isinstance(vid_writer[i], cv2.VideoWriter): + vid_writer[i].release() # release previous video writer + if vid_cap: # video + fps = vid_cap.get(cv2.CAP_PROP_FPS) + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + else: # stream + fps, w, h = 30, im0.shape[1], im0.shape[0] + save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos + vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) + vid_writer[i].write(im0) + + # Print time (inference-only) + LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') + + # Print results + t = tuple(x / seen * 1E3 for x in dt) # speeds per image + LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) + if save_txt or save_img: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") + if update: + strip_optimizer(weights) # update model (to fix SourceChangeWarning) + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') + parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') + parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') + parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--view-img', action='store_true', help='show results') + parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') + parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') + parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') + parser.add_argument('--nosave', action='store_true', help='do not save images/videos') + parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') + parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') + parser.add_argument('--augment', action='store_true', help='augmented inference') + parser.add_argument('--visualize', action='store_true', help='visualize features') + parser.add_argument('--update', action='store_true', help='update all models') + parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') + parser.add_argument('--name', default='exp', help='save results to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') + parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') + parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') + parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + opt = parser.parse_args() + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand + print_args(vars(opt)) + return opt + + +def main(opt): + check_requirements(exclude=('tensorboard', 'thop')) + run(**vars(opt)) + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) From ad7a3430055b7e61631a80e10ab029b3f4b89a3b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Aug 2022 17:11:45 +0000 Subject: [PATCH 070/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- models/yolo.py | 11 ++- segment/detect.py | 2 +- segment/predict.py | 2 +- segment/train.py | 119 +++++++++++++++-------------- segment/val.py | 86 ++++++++++++--------- utils/dataloaders.py | 21 +++--- utils/general.py | 6 +- utils/loggers/__init__.py | 5 +- utils/plots.py | 45 ++++------- utils/segment/augmentations.py | 6 +- utils/segment/dataloaders.py | 132 +++++++++++++++++++-------------- utils/segment/general.py | 22 ++++-- utils/segment/loss.py | 79 +++++++++++++------- utils/segment/metrics.py | 111 +++++++++++++++++---------- utils/segment/plots.py | 45 +++++------ 15 files changed, 396 insertions(+), 296 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index c1f32d19fef9..e5065347bc70 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -89,15 +89,16 @@ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape) return grid, anchor_grid + class DetectSegment(Detect): + def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inplace=True): super().__init__(nc, anchors, ch, inplace) self.mask_dim = mask_dim self.no = nc + 5 + self.mask_dim # number of outputs per anchor self.nm = 5 + self.mask_dim self.proto_c = proto_channel - self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) - for x in ch) # output conv + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv # p3作为输入 self.proto_net = nn.Sequential( @@ -106,7 +107,7 @@ def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inp # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), # nn.SiLU(inplace=True), # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), - # nn.SiLU(inplace=True), + # nn.SiLU(inplace=True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), nn.SiLU(inplace=True), @@ -306,7 +307,8 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) if hasattr(m, "mask_dim"): - b.data[:, 5+m.mask_dim:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + b.data[:, 5 + m.mask_dim:] += math.log(0.6 / + (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls else: b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) @@ -314,6 +316,7 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility + class ClassificationModel(BaseModel): # YOLOv5 classification model def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index diff --git a/segment/detect.py b/segment/detect.py index 2eac4e46321f..24d1dd47a0f5 100644 --- a/segment/detect.py +++ b/segment/detect.py @@ -43,9 +43,9 @@ from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box +from utils.segment.general import non_max_suppression_masks, process_mask_upsample, scale_masks from utils.segment.plots import plot_masks from utils.torch_utils import select_device, time_sync -from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample @torch.no_grad() diff --git a/segment/predict.py b/segment/predict.py index dd45690266ec..09efa844c6df 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -43,9 +43,9 @@ from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box +from utils.segment.general import non_max_suppression_masks, process_mask_upsample, scale_masks from utils.segment.plots import plot_masks from utils.torch_utils import select_device, time_sync -from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample @torch.no_grad() diff --git a/segment/train.py b/segment/train.py index 5986aa4278ad..9771c6472f83 100644 --- a/segment/train.py +++ b/segment/train.py @@ -22,17 +22,18 @@ from datetime import datetime from pathlib import Path -import val # for end-of-epoch mAP import numpy as np import torch import torch.distributed as dist import torch.nn as nn +import torch.nn.functional as F import yaml from torch.nn.parallel import DistributedDataParallel as DDP -import torch.nn.functional as F from torch.optim import SGD, Adam, AdamW, lr_scheduler from tqdm import tqdm +import val # for end-of-epoch mAP + FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: @@ -44,7 +45,6 @@ from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks -from utils.segment.dataloaders import create_dataloader from utils.downloads import attempt_download from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, @@ -52,22 +52,25 @@ labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) from utils.loggers import GenericLogger from utils.loggers.wandb.wandb_utils import check_wandb_resume +from utils.plots import plot_evolve, plot_labels +from utils.segment.dataloaders import create_dataloader from utils.segment.loss import ComputeLoss from utils.segment.metrics import fitness -from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first - LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -from utils.general import LOGGER, check_amp, check_version +from datetime import datetime + +import yaml +from torch.optim import AdamW + from utils.autobatch import check_train_batch_size +from utils.general import LOGGER, check_amp, check_version +from utils.segment.metrics import BEST_KEYS, KEYS from utils.segment.plots import plot_images_and_masks, plot_results_with_masks -from utils.segment.metrics import KEYS, BEST_KEYS -from torch.optim import AdamW -import yaml -from datetime import datetime + def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ @@ -95,9 +98,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Loggers data_dict = None if RANK in {-1, 0}: - logger = GenericLogger( - opt=opt, console_logger=LOGGER - ) # loggers instance + logger = GenericLogger(opt=opt, console_logger=LOGGER) # loggers instance # Register actions # for k in methods(loggers): @@ -226,26 +227,27 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio LOGGER.info('Using SyncBatchNorm()') # Trainloader - train_loader, dataset = create_dataloader(train_path, - imgsz, - batch_size // WORLD_SIZE, - gs, - single_cls, - hyp=hyp, - augment=True, - cache=None if opt.cache == 'val' else opt.cache, - rect=opt.rect, - rank=LOCAL_RANK, - workers=workers, - image_weights=opt.image_weights, - quad=opt.quad, - prefix=colorstr('train: '), - shuffle=True, - mask_downsample_ratio=mask_ratio, - overlap_mask=overlap, - ) + train_loader, dataset = create_dataloader( + train_path, + imgsz, + batch_size // WORLD_SIZE, + gs, + single_cls, + hyp=hyp, + augment=True, + cache=None if opt.cache == 'val' else opt.cache, + rect=opt.rect, + rank=LOCAL_RANK, + workers=workers, + image_weights=opt.image_weights, + quad=opt.quad, + prefix=colorstr('train: '), + shuffle=True, + mask_downsample_ratio=mask_ratio, + overlap_mask=overlap, + ) mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class - print("mlc , nc ", mlc, " ", nc ) + print("mlc , nc ", mlc, " ", nc) nb = len(train_loader) # number of batches assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' @@ -328,11 +330,12 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) - LOGGER.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) + LOGGER.info(("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar optimizer.zero_grad() - for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------------- + for i, (imgs, targets, paths, _, + masks) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 @@ -358,7 +361,8 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward - loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float()) # loss scaled by batch_size + loss, loss_items = compute_loss(pred, targets.to(device), + masks=masks.to(device).float()) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: @@ -380,9 +384,9 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) - pbar.set_description(("%10s" * 2 + "%10.4g" * 6) - % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1])) - # for plots + pbar.set_description(("%10s" * 2 + "%10.4g" * 6) % + (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])) + # for plots if mask_ratio != 1: masks = F.interpolate( masks[None, :].float(), @@ -395,7 +399,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if ni < 3: f = save_dir / f"train_batch{ni}.jpg" # filename plot_images_and_masks(imgs, targets, masks, paths, f) - + if ni == 10: files = sorted(save_dir.glob('train*.jpg')) logger.log_images(files, "Mosaics") @@ -411,18 +415,19 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP - results, maps, _ = val.run(data_dict, - batch_size=batch_size // WORLD_SIZE * 2, - imgsz=imgsz, - model=ema.ema, - single_cls=single_cls, - dataloader=val_loader, - save_dir=save_dir, - plots=plots, - #callbacks=callbacks, - compute_loss=compute_loss, - mask_downsample_ratio=mask_ratio, - overlap=overlap) + results, maps, _ = val.run( + data_dict, + batch_size=batch_size // WORLD_SIZE * 2, + imgsz=imgsz, + model=ema.ema, + single_cls=single_cls, + dataloader=val_loader, + save_dir=save_dir, + plots=plots, + #callbacks=callbacks, + compute_loss=compute_loss, + mask_downsample_ratio=mask_ratio, + overlap=overlap) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check @@ -455,7 +460,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio torch.save(ckpt, w / f'epoch{epoch}.pt') logger.log_model(w / f'epoch{epoch}.pt') del ckpt - # EarlyStopping if RANK != -1: # if DDP training @@ -496,9 +500,9 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio logger.log_metrics(metrics_dict, epoch) #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) # on train end callback using genericLogger - logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1) + logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs + 1) if not opt.evolve: - logger.log_model(best, epoch+1) + logger.log_model(best, epoch + 1) if plots: plot_results_with_masks(file=save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] @@ -511,7 +515,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio return results - def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') @@ -548,10 +551,12 @@ def parse_opt(known=False): parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') - + # Instance Segmentation Args parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory') - parser.add_argument('--overlap-mask', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease') + parser.add_argument('--overlap-mask', + action='store_true', + help='Overlapping masks train faster at the cost of slight accuray decrease') opt = parser.parse_known_args()[0] if known else parser.parse_args() return opt diff --git a/segment/val.py b/segment/val.py index a301f636fb7d..06aba5e8459a 100644 --- a/segment/val.py +++ b/segment/val.py @@ -34,22 +34,22 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -import torch.nn.functional as F import pycocotools.mask as mask_util +import torch.nn.functional as F + from models.common import DetectMultiBackend from models.experimental import attempt_load # scoped to avoid circular import from utils.callbacks import Callbacks -from utils.segment.dataloaders import create_dataloader from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, - coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, - scale_coords, xywh2xyxy, xyxy2xywh) -from utils.segment.general import (non_max_suppression_masks, process_mask_upsample, mask_iou, - scale_masks, process_mask) + coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords, + xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou -from utils.segment.metrics import ap_per_class_box_and_mask, Metrics -from utils.segment.plots import plot_images_and_masks from utils.plots import output_to_target, plot_val_study -from utils.torch_utils import select_device, time_sync, de_parallel +from utils.segment.dataloaders import create_dataloader +from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks +from utils.segment.metrics import Metrics, ap_per_class_box_and_mask +from utils.segment.plots import plot_images_and_masks +from utils.torch_utils import de_parallel, select_device, time_sync def save_one_txt(predn, save_conf, shape, file): @@ -118,15 +118,20 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): gt_masks = torch.where(gt_masks == index, 1.0, 0.0) if gt_masks.shape[1:] != pred_masks.shape[1:]: - gt_masks = F.interpolate(gt_masks.unsqueeze(0), pred_masks.shape[1:], mode="bilinear", - align_corners=False, ).squeeze(0) - - iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1), ) - x = torch.where( - (iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match + gt_masks = F.interpolate( + gt_masks.unsqueeze(0), + pred_masks.shape[1:], + mode="bilinear", + align_corners=False, + ).squeeze(0) + + iou = mask_iou( + gt_masks.view(gt_masks.shape[0], -1), + pred_masks.view(pred_masks.shape[0], -1), + ) + x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match if x[0].shape[0]: - matches = ( - torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] + matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] @@ -239,8 +244,8 @@ def run( confusion_matrix = ConfusionMatrix(nc=nc) names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) - s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", - "Mask:{P", "R", "mAP@.5", "mAP@.5:.95}") + s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", + "mAP@.5", "mAP@.5:.95}") dt = [0.0, 0.0, 0.0] metrics = Metrics() loss = torch.zeros(4, device=device) @@ -261,7 +266,7 @@ def run( dt[0] += t2 - t1 # Inference - out, train_out = model(im) #if training else model(im, augment=augment, val=True) # inference, loss outputs + out, train_out = model(im) #if training else model(im, augment=augment, val=True) # inference, loss outputs dt[1] += time_sync() - t2 # Loss @@ -272,7 +277,12 @@ def run( targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() - out = non_max_suppression_masks(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, + out = non_max_suppression_masks(out, + conf_thres, + iou_thres, + labels=lb, + multi_label=True, + agnostic=single_cls, mask_dim=de_parallel(model).model[-1].mask_dim) dt[2] += time_sync() - t3 @@ -296,8 +306,8 @@ def run( midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], - shape=im[si].shape[1:]).permute(2, 0, 1).contiguous() + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, + 1).contiguous() if plots and batch_i < 3: # filter top 15 to plot plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu()) @@ -317,14 +327,15 @@ def run( correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap) if plots: confusion_matrix.process_batch(predn, labelsn) - stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) + stats.append( + (correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) if save_json: - pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), - shape, shapes[si][1]) + pred_masks = scale_masks(im[si].shape[1:], + pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si]) @@ -337,11 +348,11 @@ def run( mode="bilinear", align_corners=False, ).squeeze(0) - plot_images_and_masks(im, targets, masks, paths, - save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels + plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', + names) # labels plot_masks = torch.cat(plot_masks, dim=0) - plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, - save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred + plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, + save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred callbacks.run('on_val_batch_end') @@ -372,7 +383,7 @@ def run( # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) - #callbacks.run('on_val_end') + #callbacks.run('on_val_end') # in case the cocoeval will update map ( @@ -404,8 +415,10 @@ def run( eval_bbox = COCOeval(anno, pred, 'bbox') eval_mask = COCOeval(anno, pred, 'segm') if is_coco: - eval_bbox.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate - eval_mask.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate + eval_bbox.params.imgIds = [int(Path(x).stem) + for x in dataloader.dataset.im_files] # image IDs to evaluate + eval_mask.params.imgIds = [int(Path(x).stem) + for x in dataloader.dataset.im_files] # image IDs to evaluate eval_bbox.evaluate() eval_bbox.accumulate() eval_bbox.summarize() @@ -433,8 +446,11 @@ def run( map50_mask, map_mask, ) - return ((*final_metric, *(loss.cpu() / len(dataloader)).tolist()), - metrics.get_maps(nc), t,) + return ( + (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), + metrics.get_maps(nc), + t, + ) def parse_opt(): diff --git a/utils/dataloaders.py b/utils/dataloaders.py index ca70bfcbdac7..08d8a293fc31 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -139,16 +139,17 @@ def create_dataloader(path, loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates # generator = torch.Generator() # generator.manual_seed(0) - return loader(dataset, - batch_size=batch_size, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=True, - collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, - worker_init_fn=seed_worker, - # generator=generator, - ), dataset + return loader( + dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, + worker_init_fn=seed_worker, + # generator=generator, + ), dataset class InfiniteDataLoader(dataloader.DataLoader): diff --git a/utils/general.py b/utils/general.py index fcad90041fb3..3f81e8733139 100644 --- a/utils/general.py +++ b/utils/general.py @@ -25,7 +25,6 @@ from subprocess import check_output from typing import Optional from zipfile import ZipFile -from PIL import ImageFont import cv2 import numpy as np @@ -34,6 +33,7 @@ import torch import torchvision import yaml +from PIL import ImageFont from utils.downloads import gsutil_getsize from utils.metrics import box_iou, fitness @@ -465,6 +465,7 @@ def check_file(file, suffix=''): assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique return files[0] # return file + ''' def check_font(font=FONT, progress=False): # Download font to CONFIG_DIR if necessary @@ -475,6 +476,8 @@ def check_font(font=FONT, progress=False): LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, str(file), progress=progress) ''' + + def check_font(font="Arial.ttf", size=10, progress=False): # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary font = Path(font) @@ -487,6 +490,7 @@ def check_font(font="Arial.ttf", size=10, progress=False): torch.hub.download_url_to_file(url, str(font), progress=progress) return ImageFont.truetype(str(font), size) + def check_dataset(data, autodownload=True): # Download, check and/or unzip dataset if not found locally diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 5069f2d2b16b..1bb5de45320f 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -15,7 +15,7 @@ from utils.loggers.clearml.clearml_utils import ClearmlLogger from utils.loggers.wandb.wandb_utils import WandbLogger from utils.plots import plot_images, plot_results -from utils.segment.plots import plot_results_with_masks, plot_images_and_masks +from utils.segment.plots import plot_images_and_masks, plot_results_with_masks from utils.torch_utils import de_parallel LOGGERS = ('csv', 'tb', 'wandb', 'clearml') # *.csv, TensorBoard, Weights & Biases, ClearML @@ -295,12 +295,13 @@ def log_model(self, model_path, epoch=0, metadata={}): art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata) art.add_file(str(model_path)) wandb.log_artifact(art) - + def update_params(self, params): # Update the paramters logged if self.wandb: wandb.run.config.update(params, allow_val_change=True) + def log_tensorboard_graph(tb, model, imgsz=(640, 640)): # Log model graph to TensorBoard try: diff --git a/utils/plots.py b/utils/plots.py index 2f5741f88ae6..ed227008e113 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -527,6 +527,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, import math import os from copy import copy +from itertools import repeat from pathlib import Path import cv2 @@ -537,7 +538,6 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, import seaborn as sn import torch from PIL import Image, ImageDraw -from itertools import repeat from .metrics import fitness @@ -582,7 +582,7 @@ def __call__(self, i, bgr=False): @staticmethod def hex2rgb(h): # rgb order (PIL) - return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)) + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) colors = Colors() # create instance for 'from utils.plots import colors' @@ -602,16 +602,12 @@ def __init__( pil=False, example="abc", ): - assert ( - im.data.contiguous - ), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images." + assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images." self.pil = pil or not is_ascii(example) if self.pil: # use PIL self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) - self.font = check_font( - font="Arial.Unicode.ttf", - ) + self.font = check_font(font="Arial.Unicode.ttf",) else: # use cv2 self.im = im self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width @@ -628,8 +624,7 @@ def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 2 box[0], box[1] - h if outside else box[1], box[0] + w + 1, - box[1] + 1 if outside else box[1] + h + 1, - ], + box[1] + 1 if outside else box[1] + h + 1,], fill=color, ) # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 @@ -644,9 +639,7 @@ def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 2 cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) if label: tf = max(self.lw - 1, 1) # font thickness - w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[ - 0 - ] # text width, height + w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height outside = p1[1] - h - 3 >= 0 # label fits outside box p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled @@ -734,7 +727,7 @@ def plot_images( break x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin im = im.transpose(1, 2, 0) - mosaic[y : y + h, x : x + w, :] = im + mosaic[y:y + h, x:x + w, :] = im # Resize (optional) scale = max_size / ns / max(h, w) @@ -822,7 +815,7 @@ def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) ax = ax.ravel() for i in range(4): - ax[i].hist(x[i], bins=100, label="%.3g +/- %.3g" % (x[i].mean(), x[i].std())) + ax[i].hist(x[i], bins=100, label="{:.3g} +/- {:.3g}".format(x[i].mean(), x[i].std())) ax[i].legend() ax[i].set_title(s[i]) plt.savefig("targets.jpg", dpi=200) @@ -848,8 +841,7 @@ def plot_val_study(file="", dir="", x=None): # from utils.plots import *; plot_ "mAP@.5:.95", "t_preprocess (ms/img)", "t_inference (ms/img)", - "t_NMS (ms/img)", - ] + "t_NMS (ms/img)",] for i in range(7): ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8) ax[i].set_title(s[i]) @@ -947,8 +939,7 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=""): "Battery", "dt_raw (ms)", "dt_smooth (ms)", - "real-world FPS", - ] + "real-world FPS",] files = list(Path(save_dir).glob("frames*.txt")) for fi, f in enumerate(files): try: @@ -978,14 +969,12 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=""): else: a.remove() except Exception as e: - print("Warning: Plotting error for %s; %s" % (f, e)) + print("Warning: Plotting error for {}; {}".format(f, e)) ax[1].legend() plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200) -def plot_evolve( - evolve_csv="path/to/evolve.csv", -): # from utils.plots import *; plot_evolve() +def plot_evolve(evolve_csv="path/to/evolve.csv",): # from utils.plots import *; plot_evolve() # Plot evolve.csv hyp evolution results evolve_csv = Path(evolve_csv) data = pd.read_csv(evolve_csv) @@ -1001,7 +990,7 @@ def plot_evolve( plt.subplot(6, 5, i + 1) plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none") plt.plot(mu, f.max(), "k+", markersize=15) - plt.title("%s = %.3g" % (k, mu), fontdict={"size": 9}) # limit to 40 characters + plt.title("{} = {:.3g}".format(k, mu), fontdict={"size": 9}) # limit to 40 characters if i % 5 != 0: plt.yticks([]) print("%15s: %.3g" % (k, mu)) @@ -1056,9 +1045,7 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None): import random # Plots one bounding box on image img - tl = ( - line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 - ) # line/font thickness + tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1) # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) @@ -1092,9 +1079,7 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec if height > 1 and width > 1: f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename - blocks = torch.chunk( - x[0].cpu(), channels, dim=0 - ) # select batch index 0, block by channels + blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels n = min(n, channels) # number of plots fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols ax = ax.ravel() diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py index dc29df6ad8ad..169addedf0f5 100644 --- a/utils/segment/augmentations.py +++ b/utils/segment/augmentations.py @@ -9,8 +9,9 @@ import cv2 import numpy as np -from ..general import segment2box, resample_segments from ..augmentations import box_candidates +from ..general import resample_segments, segment2box + def mixup(im, labels, segments, im2, labels2, segments2): # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf @@ -20,6 +21,7 @@ def mixup(im, labels, segments, im2, labels2, segments2): segments = np.concatenate((segments, segments2), 0) return im, labels, segments + def random_perspective(im, targets=(), segments=(), @@ -100,5 +102,3 @@ def random_perspective(im, new_segments = np.array(new_segments)[i] return im, targets, new_segments - - diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py index f4af39617dea..f6fe642d077f 100644 --- a/utils/segment/dataloaders.py +++ b/utils/segment/dataloaders.py @@ -3,20 +3,19 @@ Dataloaders """ -import numpy as np -import cv2 -import random import os -import torch +import random -from torch.utils.data import DataLoader -from torch.utils.data import distributed +import cv2 +import numpy as np +import torch +from torch.utils.data import DataLoader, distributed from ..augmentations import augment_hsv, copy_paste, letterbox -from ..dataloaders import LoadImagesAndLabels, InfiniteDataLoader, seed_worker -from ..general import xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER +from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker +from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn from ..torch_utils import torch_distributed_zero_first -from .augmentations import random_perspective, mixup +from .augmentations import mixup, random_perspective def create_dataloader(path, @@ -35,7 +34,7 @@ def create_dataloader(path, quad=False, prefix='', shuffle=False, - mask_downsample_ratio=1, + mask_downsample_ratio=1, overlap_mask=False): if rect and shuffle: LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False') @@ -64,25 +63,40 @@ def create_dataloader(path, loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates # generator = torch.Generator() # generator.manual_seed(0) - return loader(dataset, - batch_size=batch_size, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=True, - collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, - worker_init_fn=seed_worker, - # generator=generator, - ), dataset + return loader( + dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, + worker_init_fn=seed_worker, + # generator=generator, + ), dataset class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing - def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0, prefix="", - downsample_ratio=1, overlap=False, + + def __init__( + self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0, + prefix="", + downsample_ratio=1, + overlap=False, ): super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, - stride, pad, prefix) + stride, pad, prefix) self.downsample_ratio = downsample_ratio self.overlap = overlap @@ -99,8 +113,7 @@ def __getitem__(self, index): # MixUp augmentation if random.random() < hyp["mixup"]: - img, labels, segments = mixup(img, labels, segments, - *self.load_mosaic(random.randint(0, self.n - 1))) + img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1))) else: # Load image @@ -116,30 +129,44 @@ def __getitem__(self, index): segments = self.segments[index].copy() if len(segments): for i_s in range(len(segments)): - segments[i_s] = xyn2xy(segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], ) + segments[i_s] = xyn2xy( + segments[i_s], + ratio[0] * w, + ratio[1] * h, + padw=pad[0], + padh=pad[1], + ) if labels.size: # normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: - img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"], - translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"], - return_seg=True, ) + img, labels, segments = random_perspective( + img, + labels, + segments=segments, + degrees=hyp["degrees"], + translate=hyp["translate"], + scale=hyp["scale"], + shear=hyp["shear"], + perspective=hyp["perspective"], + return_seg=True, + ) nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) if self.overlap: - masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, - downsample_ratio=self.downsample_ratio) + masks, sorted_idx = polygons2masks_overlap(img.shape[:2], + segments, + downsample_ratio=self.downsample_ratio) masks = masks[None] # (640, 640) -> (1, 640, 640) labels = labels[sorted_idx] else: masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) - masks = (torch.from_numpy(masks) if len(masks) else - torch.zeros(1 if self.overlap else nl, - img.shape[0] // self.downsample_ratio, - img.shape[1] // self.downsample_ratio)) + masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] // + self.downsample_ratio, img.shape[1] // + self.downsample_ratio)) # TODO: albumentations support if self.augment: # Albumentations @@ -174,14 +201,14 @@ def __getitem__(self, index): # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) - + return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks) def load_mosaic(self, index): # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic labels4, segments4 = [], [] s = self.img_size - yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y # 3 additional image indices indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices @@ -224,16 +251,15 @@ def load_mosaic(self, index): # Augment img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) - img4, labels4, segments4 = random_perspective( - img4, - labels4, - segments4, - degrees=self.hyp["degrees"], - translate=self.hyp["translate"], - scale=self.hyp["scale"], - shear=self.hyp["shear"], - perspective=self.hyp["perspective"], - border=self.mosaic_border) # border to remove + img4, labels4, segments4 = random_perspective(img4, + labels4, + segments4, + degrees=self.hyp["degrees"], + translate=self.hyp["translate"], + scale=self.hyp["scale"], + shear=self.hyp["shear"], + perspective=self.hyp["perspective"], + border=self.mosaic_border) # border to remove return img4, labels4, segments4 @staticmethod @@ -259,7 +285,7 @@ def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): polygons = polygons.reshape(shape[0], -1, 2) cv2.fillPoly(mask, polygons, color=color) nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) - # NOTE: fillPoly firstly then resize is trying the keep the same way + # NOTE: fillPoly firstly then resize is trying the keep the same way # of loss calculation when mask-ratio=1. mask = cv2.resize(mask, (nw, nh)) return mask @@ -269,22 +295,20 @@ def polygons2masks(img_size, polygons, color, downsample_ratio=1): """ Args: img_size (tuple): The image size. - polygons (list[np.ndarray]): each polygon is [N, M], + polygons (list[np.ndarray]): each polygon is [N, M], N is the number of polygons, M is the number of points(Be divided by 2). """ masks = [] for si in range(len(polygons)): - mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, - downsample_ratio) + mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) masks.append(mask) return np.array(masks) def polygons2masks_overlap(img_size, segments, downsample_ratio=1): """Return a (640, 640) overlap mask.""" - masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), - dtype=np.uint8) + masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), dtype=np.uint8) areas = [] ms = [] for si in range(len(segments)): diff --git a/utils/segment/general.py b/utils/segment/general.py index 00367e7268fd..675fac4fbd92 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -10,8 +10,17 @@ from ..metrics import box_iou -def non_max_suppression_masks(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, - multi_label=False, labels=(), max_det=300, mask_dim=32, ): +def non_max_suppression_masks( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + mask_dim=32, +): """Runs Non-Maximum Suppression (NMS) on inference results Returns: @@ -119,7 +128,10 @@ def crop(masks, boxes): """ h, w, n = masks.size() x1, x2 = boxes[:, 0], boxes[:, 2] - y1, y2 = (boxes[:, 1], boxes[:, 3],) + y1, y2 = ( + boxes[:, 1], + boxes[:, 3], + ) rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n)) cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n)) @@ -226,7 +238,7 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): def mask_iou(mask1, mask2): """ - mask1: [N, n] m1 means number of predicted objects + mask1: [N, n] m1 means number of predicted objects mask2: [M, n] m2 means number of gt objects Note: n means image_w x image_h @@ -244,7 +256,7 @@ def mask_iou(mask1, mask2): def masks_iou(mask1, mask2): """ - mask1: [N, n] m1 means number of predicted objects + mask1: [N, n] m1 means number of predicted objects mask2: [N, n] m2 means number of gt objects Note: n means image_w x image_h diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 992fe98499ff..d1027a387f7e 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -3,12 +3,14 @@ import torch.nn.functional as F from ..general import xywh2xyxy -from ..loss import smooth_BCE, FocalLoss -from ..torch_utils import is_parallel +from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou -from .general import masks_iou, crop +from ..torch_utils import is_parallel +from .general import crop, masks_iou + class MaskIOULoss(nn.Module): + def __init__(self) -> None: super().__init__() @@ -29,6 +31,7 @@ def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False): iou = masks_iou(pred_mask, gt_mask) return iou if return_iou else (1.0 - iou) + class ComputeLoss: # Compute losses def __init__(self, model, autobalance=False, overlap=False): @@ -54,7 +57,13 @@ def __init__(self, model, autobalance=False, overlap=False): det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index - self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = (BCEcls, BCEobj, 1.0, h, autobalance,) + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = ( + BCEcls, + BCEobj, + 1.0, + h, + autobalance, + ) for k in "na", "nc", "nl", "anchors", "nm": if hasattr(det, k): setattr(self, k, getattr(det, k)) @@ -68,8 +77,11 @@ def __call__(self, preds, targets, masks): # predictions, targets, model device = targets.device lcls, lbox, lobj, lseg = ( - torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device), - torch.zeros(1, device=device),) + torch.zeros(1, device=device), + torch.zeros(1, device=device), + torch.zeros(1, device=device), + torch.zeros(1, device=device), + ) tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions @@ -91,7 +103,13 @@ def __call__(self, preds, targets, masks): # predictions, targets, model score_iou = iou.detach().clamp(0).type(tobj.dtype) if self.sort_obj_iou: sort_id = torch.argsort(score_iou) - b, a, gj, gi, score_iou = (b[sort_id], a[sort_id], gj[sort_id], gi[sort_id], score_iou[sort_id],) + b, a, gj, gi, score_iou = ( + b[sort_id], + a[sort_id], + gj[sort_id], + gi[sort_id], + score_iou[sort_id], + ) tobj[b, a, gj, gi] = 1.0 * ((1.0 - self.gr) + self.gr * score_iou) # iou ratio # Classification @@ -103,14 +121,15 @@ def __call__(self, preds, targets, masks): # predictions, targets, model # Mask Regression # TODO: # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w] - downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), mode="bilinear", - align_corners=False).squeeze(0) + downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), + mode="bilinear", + align_corners=False).squeeze(0) mxywh = xywh[i] mws, mhs = mxywh[:, 2:].T mws, mhs = mws / pi.shape[3], mhs / pi.shape[2] - mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] * torch.tensor( - [mask_w, mask_h, mask_w, mask_h], device=mxywh.device)) + mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] * + torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device)) mxyxys = xywh2xyxy(mxywhs) batch_lseg = torch.zeros(1, device=device) @@ -128,7 +147,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model mw, mh = mws[index], mhs[index] mxyxy = mxyxys[index] - psi = ps[index][:, 5: self.nm] + psi = ps[index][:, 5:self.nm] proto = proto_out[bi] one_lseg = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) @@ -165,15 +184,15 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") lseg = crop(lseg, xyxy) lseg = lseg.mean(dim=(0, 1)) / w / h - return lseg.mean()#, iou# + lseg_iou.mean() + return lseg.mean() #, iou# + lseg_iou.mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], [] gain = torch.ones(8, device=targets.device) # normalized to gridspace gain - ai = ( - torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)) # same as .repeat_interleave(nt) + ai = (torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, + nt)) # same as .repeat_interleave(nt) if self.overlap: batch = p[0].shape[0] ti = [] @@ -181,27 +200,33 @@ def build_targets(self, p, targets): # find number of targets of each image num = (targets[:, 0] == i).sum() # (na, num) - ti.append( - torch.arange(num, device=targets.device) - .float() - .view(1, num) - .repeat(na, 1) + 1) + ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1) # (na, nt) ti = torch.cat(ti, 1) else: - ti = ( - torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1)) # same as .repeat_interleave(nt) + ti = (torch.arange(nt, device=targets.device).float().view(1, + nt).repeat(na, + 1)) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2) # append anchor indices g = 0.5 # bias - off = (torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m - # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], device=targets.device, ).float() * g) # offsets + off = ( + torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=targets.device, + ).float() * g) # offsets for i in range(self.nl): anchors, shape = self.anchors[i], p[i].shape - gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain + gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain @@ -234,7 +259,7 @@ def build_targets(self, p, targets): # Append a = t[:, 6].long() # anchor indices tidx = t[:, 7].long() - indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid + indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py index 65e3011f9f12..981d90252ec9 100644 --- a/utils/segment/metrics.py +++ b/utils/segment/metrics.py @@ -5,6 +5,7 @@ import numpy as np from easydict import EasyDict as edict + from ..metrics import ap_per_class @@ -14,26 +15,57 @@ def fitness(x): return (x[:, :8] * w).sum(1) -def ap_per_class_box_and_mask(tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ): +def ap_per_class_box_and_mask( + tp_m, + tp_b, + conf, + pred_cls, + target_cls, + plot=False, + save_dir=".", + names=(), +): """ Args: tp_b: tp of boxes. tp_m: tp of masks. other arguments see `func: ap_per_class`. """ - results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, - prefix="Box")[2:] - results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, - prefix="Mask")[2:] + results_boxes = ap_per_class(tp_b, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Box")[2:] + results_masks = ap_per_class(tp_m, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Mask")[2:] results = edict({ - "boxes": {"p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[3], "f1": results_boxes[2], + "boxes": { + "p": results_boxes[0], + "r": results_boxes[1], + "ap": results_boxes[3], + "f1": results_boxes[2], "ap_class": results_boxes[4]}, - "masks": {"p": results_masks[0], "r": results_masks[1], "ap": results_masks[3], "f1": results_masks[2], + "masks": { + "p": results_masks[0], + "r": results_masks[1], + "ap": results_masks[3], + "f1": results_masks[2], "ap_class": results_masks[4]}}) return results + class Metric: + def __init__(self) -> None: self.p = [] # (nc, ) self.r = [] # (nc, ) @@ -145,36 +177,35 @@ def ap_class_index(self): # boxes and masks have the same ap_class_index return self.metric_box.ap_class_index -KEYS = [ - "train/box_loss", - "train/seg_loss", # train loss - "train/obj_loss", - "train/cls_loss", - "metrics/precision(B)", - "metrics/recall(B)", - "metrics/mAP_0.5(B)", - "metrics/mAP_0.5:0.95(B)", # metrics - "metrics/precision(M)", - "metrics/recall(M)", - "metrics/mAP_0.5(M)", - "metrics/mAP_0.5:0.95(M)", # metrics - "val/box_loss", - "val/seg_loss", # val loss - "val/obj_loss", - "val/cls_loss", - "x/lr0", - "x/lr1", - "x/lr2", - ] - -BEST_KEYS = [ - "best/epoch", - "best/precision(B)", - "best/recall(B)", - "best/mAP_0.5(B)", - "best/mAP_0.5:0.95(B)", - "best/precision(M)", - "best/recall(M)", - "best/mAP_0.5(M)", - "best/mAP_0.5:0.95(M)", - ] \ No newline at end of file + +KEYS = [ + "train/box_loss", + "train/seg_loss", # train loss + "train/obj_loss", + "train/cls_loss", + "metrics/precision(B)", + "metrics/recall(B)", + "metrics/mAP_0.5(B)", + "metrics/mAP_0.5:0.95(B)", # metrics + "metrics/precision(M)", + "metrics/recall(M)", + "metrics/mAP_0.5(M)", + "metrics/mAP_0.5:0.95(M)", # metrics + "val/box_loss", + "val/seg_loss", # val loss + "val/obj_loss", + "val/cls_loss", + "x/lr0", + "x/lr1", + "x/lr2",] + +BEST_KEYS = [ + "best/epoch", + "best/precision(B)", + "best/recall(B)", + "best/mAP_0.5(B)", + "best/mAP_0.5:0.95(B)", + "best/precision(M)", + "best/recall(M)", + "best/mAP_0.5(M)", + "best/mAP_0.5:0.95(M)",] diff --git a/utils/segment/plots.py b/utils/segment/plots.py index eb1e9b61d01a..b0774213ede0 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -1,14 +1,15 @@ -import cv2 -import torch import math -import numpy as np +from pathlib import Path + +import cv2 import matplotlib.pyplot as plt +import numpy as np import pandas as pd -from pathlib import Path +import torch from PIL import Image -from ..plots import colors, Annotator from ..general import xywh2xyxy +from ..plots import Annotator, colors def plot_masks(img, masks, colors, alpha=0.5): @@ -37,7 +38,7 @@ def plot_masks(img, masks, colors, alpha=0.5): inv_alph_masks = masks * (-alpha) + 1 masks_color_summand = masks_color[0] if num_masks > 1: - inv_alph_cumul = inv_alph_masks[: (num_masks - 1)].cumprod(dim=0) + inv_alph_cumul = inv_alph_masks[:(num_masks - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) @@ -48,13 +49,12 @@ def plot_masks(img, masks, colors, alpha=0.5): img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand return (img_gpu * 255).byte().cpu().numpy() + def plot_one_box(x, img, color=None, label=None, line_thickness=None): import random # Plots one bounding box on image img - tl = ( - line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 - ) # line/font thickness + tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1) # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) @@ -74,6 +74,7 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None): lineType=cv2.LINE_AA, ) + def plot_images_and_masks( images, targets, @@ -120,7 +121,7 @@ def plot_images_and_masks( if scale_factor < 1: img = cv2.resize(img, (w, h)) - mosaic[block_y : block_y + h, block_x : block_x + w, :] = img + mosaic[block_y:block_y + h, block_x:block_x + w, :] = img if len(targets) > 0: idx = (targets[:, 0]).astype(int) image_targets = targets[idx == i] @@ -138,9 +139,7 @@ def plot_images_and_masks( boxes = xywh2xyxy(image_targets[:, 2:6]).T classes = image_targets[:, 1].astype("int") labels = image_targets.shape[1] == 6 # labels if no conf column - conf = ( - None if labels else image_targets[:, 6] - ) # check for confidence presence (label vs pred) + conf = (None if labels else image_targets[:, 6]) # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 @@ -161,11 +160,11 @@ def plot_images_and_masks( else: mask = image_masks[j].astype(np.bool) if labels or conf[j] > 0.25: # 0.25 conf thresh - label = "%s" % cls if labels else "%s %.1f" % (cls, conf[j]) + label = "%s" % cls if labels else "{} {:.1f}".format(cls, conf[j]) plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) - mosaic[block_y : block_y + h, block_x : block_x + w, :][mask] = mosaic[ - block_y : block_y + h, block_x : block_x + w, : - ][mask] * 0.35 + (np.array(color) * 0.65) + mosaic[block_y:block_y + h, block_x:block_x + + w, :][mask] = mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + ( + np.array(color) * 0.65) # Draw image filename labels if paths: @@ -193,9 +192,7 @@ def plot_images_and_masks( if fname: r = min(1280.0 / max(h, w) / ns, 1.0) # ratio to limit image size - mosaic = cv2.resize( - mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA - ) + mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save with Image.fromarray(mosaic) as im: im.save(fname) @@ -213,11 +210,8 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): try: data = pd.read_csv(f) index = np.argmax( - 0.9 * data.values[:, 8] - + 0.1 * data.values[:, 7] - + 0.9 * data.values[:, 12] - + 0.1 * data.values[:, 11], - ) + 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + + 0.1 * data.values[:, 11],) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): @@ -246,4 +240,3 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): ax[1].legend() fig.savefig(save_dir / "results.png", dpi=200) plt.close() - From 9f7633f5ce267a1662b87d7d34b9e448584cc4d4 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 23:03:12 +0530 Subject: [PATCH 071/247] Update coco.yaml --- data/coco.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/coco.yaml b/data/coco.yaml index 37d30d63f7f0..d64dfc7fed76 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -8,7 +8,7 @@ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: datasets/coco # dataset root dir +path: ../datasets/coco # dataset root dir train: train2017.txt # train images (relative to 'path') 118287 images val: val2017.txt # val images (relative to 'path') 5000 images test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 From 1ed6e1a4b3a04a40e89e8fd2832e08fdbbd1f485 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Aug 2022 17:33:34 +0000 Subject: [PATCH 072/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/plots.py | 6 +++--- utils/segment/plots.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/plots.py b/utils/plots.py index ed227008e113..f23876e0a170 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -815,7 +815,7 @@ def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) ax = ax.ravel() for i in range(4): - ax[i].hist(x[i], bins=100, label="{:.3g} +/- {:.3g}".format(x[i].mean(), x[i].std())) + ax[i].hist(x[i], bins=100, label=f"{x[i].mean():.3g} +/- {x[i].std():.3g}") ax[i].legend() ax[i].set_title(s[i]) plt.savefig("targets.jpg", dpi=200) @@ -969,7 +969,7 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=""): else: a.remove() except Exception as e: - print("Warning: Plotting error for {}; {}".format(f, e)) + print(f"Warning: Plotting error for {f}; {e}") ax[1].legend() plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200) @@ -990,7 +990,7 @@ def plot_evolve(evolve_csv="path/to/evolve.csv",): # from utils.plots import *; plt.subplot(6, 5, i + 1) plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none") plt.plot(mu, f.max(), "k+", markersize=15) - plt.title("{} = {:.3g}".format(k, mu), fontdict={"size": 9}) # limit to 40 characters + plt.title(f"{k} = {mu:.3g}", fontdict={"size": 9}) # limit to 40 characters if i % 5 != 0: plt.yticks([]) print("%15s: %.3g" % (k, mu)) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index b0774213ede0..8974fdfe1274 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -160,7 +160,7 @@ def plot_images_and_masks( else: mask = image_masks[j].astype(np.bool) if labels or conf[j] > 0.25: # 0.25 conf thresh - label = "%s" % cls if labels else "{} {:.1f}".format(cls, conf[j]) + label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}" plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] = mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + ( From e040d5c09beee9873d35255b2059a02c95343695 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 23:13:23 +0530 Subject: [PATCH 073/247] cleanup --- segment/detect.py | 279 ---------------------------------------------- segment/train.py | 35 ++---- segment/val.py | 8 -- 3 files changed, 8 insertions(+), 314 deletions(-) delete mode 100644 segment/detect.py diff --git a/segment/detect.py b/segment/detect.py deleted file mode 100644 index 2eac4e46321f..000000000000 --- a/segment/detect.py +++ /dev/null @@ -1,279 +0,0 @@ -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Run inference on images, videos, directories, streams, etc. - -Usage - sources: - $ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam - img.jpg # image - vid.mp4 # video - path/ # directory - path/*.jpg # glob - 'https://youtu.be/Zgi9g1ksQHc' # YouTube - 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream - -Usage - formats: - $ python path/to/detect.py --weights yolov5s.pt # PyTorch - yolov5s.torchscript # TorchScript - yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn - yolov5s.xml # OpenVINO - yolov5s.engine # TensorRT - yolov5s.mlmodel # CoreML (macOS-only) - yolov5s_saved_model # TensorFlow SavedModel - yolov5s.pb # TensorFlow GraphDef - yolov5s.tflite # TensorFlow Lite - yolov5s_edgetpu.tflite # TensorFlow Edge TPU -""" - -import argparse -import os -import sys -from pathlib import Path - -import torch -import torch.backends.cudnn as cudnn - -FILE = Path(__file__).resolve() -ROOT = FILE.parents[1] # YOLOv5 root directory -if str(ROOT) not in sys.path: - sys.path.append(str(ROOT)) # add ROOT to PATH -ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative - -from models.experimental import attempt_load -from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams -from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, - increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh) -from utils.plots import Annotator, colors, save_one_box -from utils.segment.plots import plot_masks -from utils.torch_utils import select_device, time_sync -from utils.segment.general import non_max_suppression_masks, scale_masks, process_mask_upsample - - -@torch.no_grad() -def run( - weights=ROOT / 'yolov5s.pt', # model.pt path(s) - source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam - imgsz=(640, 640), # inference size (height, width) - conf_thres=0.25, # confidence threshold - iou_thres=0.45, # NMS IOU threshold - max_det=1000, # maximum detections per image - device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu - view_img=False, # show results - save_txt=False, # save results to *.txt - save_conf=False, # save confidences in --save-txt labels - save_crop=False, # save cropped prediction boxes - nosave=False, # do not save images/videos - classes=None, # filter by class: --class 0, or --class 0 2 3 - agnostic_nms=False, # class-agnostic NMS - augment=False, # augmented inference - visualize=False, # visualize features - update=False, # update all models - project=ROOT / 'runs/detect', # save results to project/name - name='exp', # save results to project/name - exist_ok=False, # existing project/name ok, do not increment - line_thickness=3, # bounding box thickness (pixels) - hide_labels=False, # hide labels - hide_conf=False, # hide confidences - half=False, # use FP16 half-precision inference -): - source = str(source) - save_img = not nosave and not source.endswith('.txt') # save inference images - is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) - is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) - webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) - if is_url and is_file: - source = check_file(source) # download - - # Directories - save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run - (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir - - # Load model - device = select_device(device) - model = attempt_load(weights, device=device, inplace=True, fuse=True) - stride = max(int(model.stride.max()), 32) # model stride - names = model.module.names if hasattr(model, 'module') else model.names # get class names - model.half() if half else model.float() - pt = True - imgsz = check_img_size(imgsz, s=stride) # check image size - - # Dataloader - if webcam: - view_img = check_imshow() - cudnn.benchmark = True # set True to speed up constant image size inference - dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) - bs = len(dataset) # batch_size - else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) - bs = 1 # batch_size - vid_path, vid_writer = [None] * bs, [None] * bs - - # Run inference - if str(device) != "cpu": - im = torch.zeros(1, 3, *imgsz).to(device).half() # input image - model(im) # warmup - seen, windows, dt = 0, [], [0.0, 0.0, 0.0] - for path, im, im0s, vid_cap, s in dataset: - t1 = time_sync() - im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 - im /= 255 # 0 - 255 to 0.0 - 1.0 - if len(im.shape) == 3: - im = im[None] # expand for batch dim - t2 = time_sync() - dt[0] += t2 - t1 - - # Inference - visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False - pred, out = model(im, augment=augment, visualize=visualize) - proto = out[1] - t3 = time_sync() - dt[1] += t3 - t2 - - # NMS - pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) - dt[2] += time_sync() - t3 - - # Second-stage classifier (optional) - # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) - - # Process predictions - for i, det in enumerate(pred): # per image - seen += 1 - if webcam: # batch_size >= 1 - p, im0, frame = path[i], im0s[i].copy(), dataset.count - s += f'{i}: ' - else: - p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) - - p = Path(p) # to Path - save_path = str(save_dir / p.name) # im.jpg - txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt - s += '%gx%g ' % im.shape[2:] # print string - gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh - imc = im0.copy() if save_crop else im0 # for save_crop - annotator = Annotator(im0, line_width=line_thickness, example=str(names)) - if len(det): - # mask stuff - masks_conf = det[:, 6:] - # binary mask, (img_h, img_w, n) - masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:]) - # n, img_h, img_w - masks = masks.permute(2, 0, 1).contiguous() - # bbox stuff - det = det[:, :6] # update the value in outputs, remove mask part. - # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() - - # Print results - for c in det[:, -1].unique(): - n = (det[:, -1] == c).sum() # detections per class - s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string - - # plot masks - mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))] - # NOTE: this way to draw masks is faster, - # but the image might get blurred, - # from https://github.com/dbolya/yolact - # image with masks, (img_h, img_w, 3) - img_masks = plot_masks(im[i], masks, mcolors) - # scale image to original hw - img_masks = scale_masks(im.shape[2:], img_masks, im0.shape) - annotator.im = img_masks - - # Write results - for j, (*xyxy, conf, cls) in enumerate(det): - if save_txt: # Write to file - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(f'{txt_path}.txt', 'a') as f: - f.write(('%g ' * len(line)).rstrip() % line + '\n') - - if save_img or save_crop or view_img: # Add bbox to image - c = int(cls) # integer class - label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') - annotator.box_label(xyxy, label, color=colors(j, True)) - if save_crop: - save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) - - # Stream results - im0 = annotator.result() - if view_img: - if p not in windows: - windows.append(p) - cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) - cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) - cv2.imshow(str(p), im0) - cv2.waitKey(1) # 1 millisecond - - # Save results (image with detections) - if save_img: - if dataset.mode == 'image': - cv2.imwrite(save_path, im0) - else: # 'video' or 'stream' - if vid_path[i] != save_path: # new video - vid_path[i] = save_path - if isinstance(vid_writer[i], cv2.VideoWriter): - vid_writer[i].release() # release previous video writer - if vid_cap: # video - fps = vid_cap.get(cv2.CAP_PROP_FPS) - w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - else: # stream - fps, w, h = 30, im0.shape[1], im0.shape[0] - save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos - vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) - vid_writer[i].write(im0) - - # Print time (inference-only) - LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') - - # Print results - t = tuple(x / seen * 1E3 for x in dt) # speeds per image - LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) - if save_txt or save_img: - s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' - LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") - if update: - strip_optimizer(weights) # update model (to fix SourceChangeWarning) - - -def parse_opt(): - parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') - parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') - parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') - parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') - parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') - parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--view-img', action='store_true', help='show results') - parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') - parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') - parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') - parser.add_argument('--nosave', action='store_true', help='do not save images/videos') - parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') - parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') - parser.add_argument('--augment', action='store_true', help='augmented inference') - parser.add_argument('--visualize', action='store_true', help='visualize features') - parser.add_argument('--update', action='store_true', help='update all models') - parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') - parser.add_argument('--name', default='exp', help='save results to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') - parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') - parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') - parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') - parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') - opt = parser.parse_args() - opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand - print_args(vars(opt)) - return opt - - -def main(opt): - check_requirements(exclude=('tensorboard', 'thop')) - run(**vars(opt)) - - -if __name__ == "__main__": - opt = parse_opt() - main(opt) diff --git a/segment/train.py b/segment/train.py index 5986aa4278ad..fc8753f14c65 100644 --- a/segment/train.py +++ b/segment/train.py @@ -43,15 +43,13 @@ from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size -from utils.callbacks import Callbacks from utils.segment.dataloaders import create_dataloader from utils.downloads import attempt_download from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, - labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) + labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer) from utils.loggers import GenericLogger -from utils.loggers.wandb.wandb_utils import check_wandb_resume from utils.segment.loss import ComputeLoss from utils.segment.metrics import fitness from utils.plots import plot_evolve, plot_labels @@ -69,7 +67,7 @@ import yaml from datetime import datetime -def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary +def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio @@ -97,11 +95,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if RANK in {-1, 0}: logger = GenericLogger( opt=opt, console_logger=LOGGER - ) # loggers instance - - # Register actions - # for k in methods(loggers): - # callbacks.register_action(k, callback=getattr(loggers, k)) + ) # Config plots = not evolve and not opt.noplots # create plots @@ -166,8 +160,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) g[0].append(v.weight) - # hyp['lr0'] = hyp['lr0'] / batch_size * 128 - # hyp['warmup_bias_lr'] = 0.01 if opt.optimizer == 'Adam': optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum elif opt.optimizer == 'AdamW': @@ -384,13 +376,8 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0],imgs.shape[-1])) # for plots if mask_ratio != 1: - masks = F.interpolate( - masks[None, :].float(), - (imgsz, imgsz), - mode="bilinear", - align_corners=False, + masks = F.interpolate(masks[None, :].float(), (imgsz, imgsz), mode="bilinear", align_corners=False, ).squeeze(0) - #callbacks.run('on_train_batch_end', ni, model, imgs, targets, masks, paths, plots) if plots: if ni < 3: f = save_dir / f"train_batch{ni}.jpg" # filename @@ -407,7 +394,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if RANK in {-1, 0}: # mAP - # callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP @@ -419,7 +405,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio dataloader=val_loader, save_dir=save_dir, plots=plots, - #callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) @@ -487,14 +472,12 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio save_json=is_coco, verbose=True, plots=plots, - #callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # val best model with plots if is_coco: metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) logger.log_metrics(metrics_dict, epoch) - #callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) # on train end callback using genericLogger logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs+1) if not opt.evolve: @@ -505,7 +488,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") logger.log_images(files, "Results") - # callbacks.run('on_train_end', last, best, plots, epoch, results) torch.cuda.empty_cache() return results @@ -557,7 +539,7 @@ def parse_opt(known=False): return opt -def main(opt, callbacks=Callbacks()): +def main(opt): # Checks if RANK in {-1, 0}: print_args(vars(opt)) @@ -565,7 +547,7 @@ def main(opt, callbacks=Callbacks()): check_requirements(exclude=['thop']) # Resume - if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run + if opt.resume and not opt.evolve: # resume an interrupted run ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: @@ -599,7 +581,7 @@ def main(opt, callbacks=Callbacks()): # Train if not opt.evolve: - train(opt.hyp, opt, device, callbacks) + train(opt.hyp, opt, device) if WORLD_SIZE > 1 and RANK == 0: LOGGER.info('Destroying process group... ') dist.destroy_process_group() @@ -681,8 +663,7 @@ def main(opt, callbacks=Callbacks()): hyp[k] = round(hyp[k], 5) # significant digits # Train mutation - results = train(hyp.copy(), opt, device, callbacks) - callbacks = Callbacks() + results = train(hyp.copy(), opt, device) # Write mutation results print_mutation(results, hyp.copy(), save_dir, opt.bucket) diff --git a/segment/val.py b/segment/val.py index a301f636fb7d..11e83f3aaec7 100644 --- a/segment/val.py +++ b/segment/val.py @@ -38,7 +38,6 @@ import pycocotools.mask as mask_util from models.common import DetectMultiBackend from models.experimental import attempt_load # scoped to avoid circular import -from utils.callbacks import Callbacks from utils.segment.dataloaders import create_dataloader from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, @@ -166,7 +165,6 @@ def run( plots=True, overlap=False, mask_downsample_ratio=1, - callbacks=Callbacks(), compute_loss=None, ): process = process_mask_upsample if plots else process_mask @@ -245,10 +243,8 @@ def run( metrics = Metrics() loss = torch.zeros(4, device=device) jdict, stats = [], [] - callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar): - callbacks.run('on_val_batch_start') t1 = time_sync() if cuda: im = im.to(device, non_blocking=True) @@ -326,7 +322,6 @@ def run( pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary - callbacks.run('on_val_image_end', pred[:, :6], predn[:, :6], path, names, im[si]) # Plot images if plots and batch_i < 3: @@ -343,8 +338,6 @@ def run( plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred - callbacks.run('on_val_batch_end') - # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): @@ -372,7 +365,6 @@ def run( # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) - #callbacks.run('on_val_end') # in case the cocoeval will update map ( From c7756b00cacd64c11637eae938f11848809038ca Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Aug 2022 17:47:57 +0000 Subject: [PATCH 074/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/train.py | 14 ++++++++------ segment/val.py | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/segment/train.py b/segment/train.py index 7c3faad86900..cd48e909a9f3 100644 --- a/segment/train.py +++ b/segment/train.py @@ -44,7 +44,6 @@ from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size -from utils.segment.dataloaders import create_dataloader from utils.downloads import attempt_download from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, @@ -52,6 +51,7 @@ labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer) from utils.loggers import GenericLogger from utils.plots import plot_evolve, plot_labels +from utils.segment.dataloaders import create_dataloader from utils.segment.loss import ComputeLoss from utils.segment.metrics import fitness from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first @@ -96,9 +96,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary # Loggers data_dict = None if RANK in {-1, 0}: - logger = GenericLogger( - opt=opt, console_logger=LOGGER - ) + logger = GenericLogger(opt=opt, console_logger=LOGGER) # Config plots = not evolve and not opt.noplots # create plots @@ -382,7 +380,11 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])) # for plots if mask_ratio != 1: - masks = F.interpolate(masks[None, :].float(), (imgsz, imgsz), mode="bilinear", align_corners=False, + masks = F.interpolate( + masks[None, :].float(), + (imgsz, imgsz), + mode="bilinear", + align_corners=False, ).squeeze(0) if plots: if ni < 3: @@ -411,7 +413,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary dataloader=val_loader, save_dir=save_dir, plots=plots, - compute_loss=compute_loss, + compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # Update best mAP diff --git a/segment/val.py b/segment/val.py index f86893b3d95a..2cd0c36264ef 100644 --- a/segment/val.py +++ b/segment/val.py @@ -39,12 +39,12 @@ from models.common import DetectMultiBackend from models.experimental import attempt_load # scoped to avoid circular import -from utils.segment.dataloaders import create_dataloader from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou from utils.plots import output_to_target, plot_val_study +from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks From 8643c17c11161391ba25392b79e76dcaa57cef49 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 20 Aug 2022 19:56:38 +0200 Subject: [PATCH 075/247] Fix duplicate plots.py --- utils/plots.py | 574 ------------------------------------------------- 1 file changed, 574 deletions(-) diff --git a/utils/plots.py b/utils/plots.py index f23876e0a170..7417308c4d82 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -517,577 +517,3 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, # cv2.imwrite(f, crop) # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB return crop - - -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Plotting utils -""" - -import math -import os -from copy import copy -from itertools import repeat -from pathlib import Path - -import cv2 -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sn -import torch -from PIL import Image, ImageDraw - -from .metrics import fitness - -# Settings -RANK = int(os.getenv("RANK", -1)) -matplotlib.rc("font", **{"size": 11}) -matplotlib.use("Agg") # for writing to files only - - -class Colors: - # Ultralytics color palette https://ultralytics.com/ - def __init__(self): - # hex = matplotlib.colors.TABLEAU_COLORS.values() - hex = ( - "FF3838", - "FF9D97", - "FF701F", - "FFB21D", - "CFD231", - "48F90A", - "92CC17", - "3DDB86", - "1A9334", - "00D4BB", - "2C99A8", - "00C2FF", - "344593", - "6473FF", - "0018EC", - "8438FF", - "520085", - "CB38FF", - "FF95C8", - "FF37C7", - ) - self.palette = [self.hex2rgb("#" + c) for c in hex] - self.n = len(self.palette) - - def __call__(self, i, bgr=False): - c = self.palette[int(i) % self.n] - return (c[2], c[1], c[0]) if bgr else c - - @staticmethod - def hex2rgb(h): # rgb order (PIL) - return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) - - -colors = Colors() # create instance for 'from utils.plots import colors' - - -class Annotator: - if RANK in (-1, 0): - check_font() # download TTF if necessary - - # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations - def __init__( - self, - im, - line_width=None, - font_size=None, - font="Arial.ttf", - pil=False, - example="abc", - ): - assert (im.data.contiguous), "Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images." - self.pil = pil or not is_ascii(example) - if self.pil: # use PIL - self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) - self.draw = ImageDraw.Draw(self.im) - self.font = check_font(font="Arial.Unicode.ttf",) - else: # use cv2 - self.im = im - self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width - - def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)): - # Add one xyxy box to image with label - if self.pil or not is_ascii(label): - self.draw.rectangle(box, width=self.lw, outline=color) # box - if label: - w, h = self.font.getsize(label) # text width, height - outside = box[1] - h >= 0 # label fits outside box - self.draw.rectangle( - [ - box[0], - box[1] - h if outside else box[1], - box[0] + w + 1, - box[1] + 1 if outside else box[1] + h + 1,], - fill=color, - ) - # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 - self.draw.text( - (box[0], box[1] - h if outside else box[1]), - label, - fill=txt_color, - font=self.font, - ) - else: # cv2 - p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) - cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) - if label: - tf = max(self.lw - 1, 1) # font thickness - w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height - outside = p1[1] - h - 3 >= 0 # label fits outside box - p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 - cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled - cv2.putText( - self.im, - label, - (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), - 0, - self.lw / 3, - txt_color, - thickness=tf, - lineType=cv2.LINE_AA, - ) - - def rectangle(self, xy, fill=None, outline=None, width=1): - # Add rectangle to image (PIL-only) - self.draw.rectangle(xy, fill, outline, width) - - def text(self, xy, text, txt_color=(255, 255, 255)): - # Add text to image (PIL-only) - w, h = self.font.getsize(text) # text width, height - self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font) - - def result(self): - # Return annotated image as array - return np.asarray(self.im) - - -def hist2d(x, y, n=100): - # 2d histogram used in labels.png and evolve.png - xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) - hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) - xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) - yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) - return np.log(hist[xidx, yidx]) - - -def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): - from scipy.signal import butter, filtfilt - - # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy - def butter_lowpass(cutoff, fs, order): - nyq = 0.5 * fs - normal_cutoff = cutoff / nyq - return butter(order, normal_cutoff, btype="low", analog=False) - - b, a = butter_lowpass(cutoff, fs, order=order) - return filtfilt(b, a, data) # forward-backward filter - - -def output_to_target(output, filter_dets=10): - # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] - targets = [] - for i, o in enumerate(output): - o = o[:filter_dets] - for *box, conf, cls in o.cpu().numpy()[:, :6]: - targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) - return np.array(targets) - - -def plot_images( - images, - targets, - paths=None, - fname="images.jpg", - names=None, - max_size=1920, - max_subplots=16, -): - # Plot image grid with labels - if isinstance(images, torch.Tensor): - images = images.cpu().float().numpy() - if isinstance(targets, torch.Tensor): - targets = targets.cpu().numpy() - if np.max(images[0]) <= 1: - images *= 255.0 # de-normalise (optional) - bs, _, h, w = images.shape # batch size, _, height, width - bs = min(bs, max_subplots) # limit plot images - ns = np.ceil(bs ** 0.5) # number of subplots (square) - - # Build Image - mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, im in enumerate(images): - if i == max_subplots: # if last batch has fewer images than we expect - break - x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin - im = im.transpose(1, 2, 0) - mosaic[y:y + h, x:x + w, :] = im - - # Resize (optional) - scale = max_size / ns / max(h, w) - if scale < 1: - h = math.ceil(scale * h) - w = math.ceil(scale * w) - mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) - - # Annotate - fs = int((h + w) * ns * 0.01) # font size - annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True) - for i in range(i + 1): - x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin - annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders - if paths: - annotator.text( - (x + 5, y + 5 + h), - text=Path(paths[i]).name[:40], - txt_color=(220, 220, 220), - ) # filenames - if len(targets) > 0: - ti = targets[targets[:, 0] == i] # image targets - boxes = xywh2xyxy(ti[:, 2:6]).T - classes = ti[:, 1].astype("int") - labels = ti.shape[1] == 6 # labels if no conf column - conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) - - if boxes.shape[1]: - if boxes.max() <= 1.01: # if normalized with tolerance 0.01 - boxes[[0, 2]] *= w # scale to pixels - boxes[[1, 3]] *= h - elif scale < 1: # absolute coords need scale if image scales - boxes *= scale - boxes[[0, 2]] += x - boxes[[1, 3]] += y - for j, box in enumerate(boxes.T.tolist()): - cls = classes[j] - color = colors(cls) - cls = names[cls] if names else cls - if labels or conf[j] > 0.25: # 0.25 conf thresh - label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}" - annotator.box_label(box, label, color=color) - annotator.im.save(fname) # save - return annotator.result() - - -def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""): - # Plot LR simulating training for full epochs - optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals - y = [] - for _ in range(epochs): - scheduler.step() - y.append(optimizer.param_groups[0]["lr"]) - plt.plot(y, ".-", label="LR") - plt.xlabel("epoch") - plt.ylabel("LR") - plt.grid() - plt.xlim(0, epochs) - plt.ylim(0) - plt.savefig(Path(save_dir) / "LR.png", dpi=200) - plt.close() - - -def plot_val_txt(): # from utils.plots import *; plot_val() - # Plot val.txt histograms - x = np.loadtxt("val.txt", dtype=np.float32) - box = xyxy2xywh(x[:, :4]) - cx, cy = box[:, 0], box[:, 1] - - fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) - ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) - ax.set_aspect("equal") - plt.savefig("hist2d.png", dpi=300) - - fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) - ax[0].hist(cx, bins=600) - ax[1].hist(cy, bins=600) - plt.savefig("hist1d.png", dpi=200) - - -def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() - # Plot targets.txt histograms - x = np.loadtxt("targets.txt", dtype=np.float32).T - s = ["x targets", "y targets", "width targets", "height targets"] - fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) - ax = ax.ravel() - for i in range(4): - ax[i].hist(x[i], bins=100, label=f"{x[i].mean():.3g} +/- {x[i].std():.3g}") - ax[i].legend() - ax[i].set_title(s[i]) - plt.savefig("targets.jpg", dpi=200) - - -def plot_val_study(file="", dir="", x=None): # from utils.plots import *; plot_val_study() - # Plot file=study.txt generated by val.py (or plot all study*.txt in dir) - save_dir = Path(file).parent if file else Path(dir) - plot2 = False # plot additional results - if plot2: - ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel() - - fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) - # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]: - for f in sorted(save_dir.glob("study*.txt")): - y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T - x = np.arange(y.shape[1]) if x is None else np.array(x) - if plot2: - s = [ - "P", - "R", - "mAP@.5", - "mAP@.5:.95", - "t_preprocess (ms/img)", - "t_inference (ms/img)", - "t_NMS (ms/img)",] - for i in range(7): - ax[i].plot(x, y[i], ".-", linewidth=2, markersize=8) - ax[i].set_title(s[i]) - - j = y[3].argmax() + 1 - ax2.plot( - y[5, 1:j], - y[3, 1:j] * 1e2, - ".-", - linewidth=2, - markersize=8, - label=f.stem.replace("study_coco_", "").replace("yolo", "YOLO"), - ) - - ax2.plot( - 1e3 / np.array([209, 140, 97, 58, 35, 18]), - [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], - "k.-", - linewidth=2, - markersize=8, - alpha=0.25, - label="EfficientDet", - ) - - ax2.grid(alpha=0.2) - ax2.set_yticks(np.arange(20, 60, 5)) - ax2.set_xlim(0, 57) - ax2.set_ylim(25, 55) - ax2.set_xlabel("GPU Speed (ms/img)") - ax2.set_ylabel("COCO AP val") - ax2.legend(loc="lower right") - f = save_dir / "study.png" - print(f"Saving {f}...") - plt.savefig(f, dpi=300) - - -def plot_labels(labels, names=(), save_dir=Path("")): - # plot dataset labels - print("Plotting labels... ") - c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes - nc = int(c.max() + 1) # number of classes - x = pd.DataFrame(b.transpose(), columns=["x", "y", "width", "height"]) - - # seaborn correlogram - sn.pairplot( - x, - corner=True, - diag_kind="auto", - kind="hist", - diag_kws=dict(bins=50), - plot_kws=dict(pmax=0.9), - ) - plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200) - plt.close() - - # matplotlib labels - matplotlib.use("svg") # faster - ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() - y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) - # [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # update colors bug #3195 - ax[0].set_ylabel("instances") - if 0 < len(names) < 30: - ax[0].set_xticks(range(len(names))) - ax[0].set_xticklabels(names, rotation=90, fontsize=10) - else: - ax[0].set_xlabel("classes") - sn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9) - sn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9) - - # rectangles - labels[:, 1:3] = 0.5 # center - labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 - img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) - for cls, *box in labels[:1000]: - ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot - ax[1].imshow(img) - ax[1].axis("off") - - for a in [0, 1, 2, 3]: - for s in ["top", "right", "left", "bottom"]: - ax[a].spines[s].set_visible(False) - - plt.savefig(save_dir / "labels.jpg", dpi=200) - matplotlib.use("Agg") - plt.close() - - -def profile_idetection(start=0, stop=0, labels=(), save_dir=""): - # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() - ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() - s = [ - "Images", - "Free Storage (GB)", - "RAM Usage (GB)", - "Battery", - "dt_raw (ms)", - "dt_smooth (ms)", - "real-world FPS",] - files = list(Path(save_dir).glob("frames*.txt")) - for fi, f in enumerate(files): - try: - results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows - n = results.shape[1] # number of rows - x = np.arange(start, min(stop, n) if stop else n) - results = results[:, x] - t = results[0] - results[0].min() # set t0=0s - results[0] = x - for i, a in enumerate(ax): - if i < len(results): - label = labels[fi] if len(labels) else f.stem.replace("frames_", "") - a.plot( - t, - results[i], - marker=".", - label=label, - linewidth=1, - markersize=5, - ) - a.set_title(s[i]) - a.set_xlabel("time (s)") - # if fi == len(files) - 1: - # a.set_ylim(bottom=0) - for side in ["top", "right"]: - a.spines[side].set_visible(False) - else: - a.remove() - except Exception as e: - print(f"Warning: Plotting error for {f}; {e}") - ax[1].legend() - plt.savefig(Path(save_dir) / "idetection_profile.png", dpi=200) - - -def plot_evolve(evolve_csv="path/to/evolve.csv",): # from utils.plots import *; plot_evolve() - # Plot evolve.csv hyp evolution results - evolve_csv = Path(evolve_csv) - data = pd.read_csv(evolve_csv) - keys = [x.strip() for x in data.columns] - x = data.values - f = fitness(x) - j = np.argmax(f) # max fitness index - plt.figure(figsize=(10, 12), tight_layout=True) - matplotlib.rc("font", **{"size": 8}) - for i, k in enumerate(keys[7:]): - v = x[:, 7 + i] - mu = v[j] # best single result - plt.subplot(6, 5, i + 1) - plt.scatter(v, f, c=hist2d(v, f, 20), cmap="viridis", alpha=0.8, edgecolors="none") - plt.plot(mu, f.max(), "k+", markersize=15) - plt.title(f"{k} = {mu:.3g}", fontdict={"size": 9}) # limit to 40 characters - if i % 5 != 0: - plt.yticks([]) - print("%15s: %.3g" % (k, mu)) - f = evolve_csv.with_suffix(".png") # filename - plt.savefig(f, dpi=200) - plt.close() - print(f"Saved {f}") - - -def plot_results(file="path/to/results.csv", dir="", best=True): - # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') - save_dir = Path(file).parent if file else Path(dir) - fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) - ax = ax.ravel() - files = list(save_dir.glob("results*.csv")) - assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." - for _, f in enumerate(files): - try: - data = pd.read_csv(f) - index = np.argmax(0.9 * data.values[:, 7] + 0.1 * data.values[:, 6]) - s = [x.strip() for x in data.columns] - x = data.values[:, 0] - for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]): - y = data.values[:, j] - # y[y == 0] = np.nan # don't show zero values - ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) - if best: - # best - ax[i].scatter( - index, - y[index], - color="r", - label=f"best:{index}", - marker="*", - linewidth=3, - ) - ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") - else: - # last - ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) - ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") - # if j in [8, 9, 10]: # share train and val loss y axes - # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) - except Exception as e: - print(f"Warning: Plotting error for {f}: {e}") - ax[1].legend() - fig.savefig(save_dir / "results.png", dpi=200) - plt.close() - - -def plot_one_box(x, img, color=None, label=None, line_thickness=None): - import random - - # Plots one bounding box on image img - tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1) # line/font thickness - color = color or [random.randint(0, 255) for _ in range(3)] - c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) - cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) - if label: - tf = max(tl - 1, 1) # font thickness - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 - cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled - cv2.putText( - img, - label, - (c1[0], c1[1] - 2), - 0, - tl / 3, - [225, 255, 255], - thickness=tf, - lineType=cv2.LINE_AA, - ) - - -def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detect/exp")): - """ - x: Features to be visualized - module_type: Module type - stage: Module stage within model - n: Maximum number of feature maps to plot - save_dir: Directory to save results - """ - if "Detect" not in module_type: - batch, channels, height, width = x.shape # batch, channels, height, width - if height > 1 and width > 1: - f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename - - blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels - n = min(n, channels) # number of plots - fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols - ax = ax.ravel() - plt.subplots_adjust(wspace=0.05, hspace=0.05) - for i in range(n): - ax[i].imshow(blocks[i].squeeze()) # cmap='gray' - ax[i].axis("off") - - print(f"Saving {save_dir / f}... ({n}/{channels})") - plt.savefig(save_dir / f, dpi=300, bbox_inches="tight") - plt.close() From 2298fcfd047c27084503cc649a71cbe17c3e7a22 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 20 Aug 2022 20:00:23 +0200 Subject: [PATCH 076/247] Fix check_font() --- utils/general.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/utils/general.py b/utils/general.py index 3f81e8733139..91566681c695 100644 --- a/utils/general.py +++ b/utils/general.py @@ -466,7 +466,6 @@ def check_file(file, suffix=''): return files[0] # return file -''' def check_font(font=FONT, progress=False): # Download font to CONFIG_DIR if necessary font = Path(font) @@ -475,20 +474,6 @@ def check_font(font=FONT, progress=False): url = "https://ultralytics.com/assets/" + font.name LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, str(file), progress=progress) -''' - - -def check_font(font="Arial.ttf", size=10, progress=False): - # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary - font = Path(font) - font = font if font.exists() else (CONFIG_DIR / font.name) - try: - return ImageFont.truetype(str(font) if font.exists() else font.name, size) - except Exception as e: # download if missing - url = "https://ultralytics.com/assets/" + font.name - print(f"Downloading {url} to {font}...") - torch.hub.download_url_to_file(url, str(font), progress=progress) - return ImageFont.truetype(str(font), size) def check_dataset(data, autodownload=True): From c9b376da97addb653ee05dbc9af85e4e998c16c9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 20 Aug 2022 20:04:02 +0200 Subject: [PATCH 077/247] # torch.use_deterministic_algorithms(True) --- utils/general.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/general.py b/utils/general.py index 91566681c695..30ad949e06cf 100644 --- a/utils/general.py +++ b/utils/general.py @@ -242,7 +242,7 @@ def init_seeds(seed=0, deterministic=False): import torch.backends.cudnn as cudnn if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 - #torch.use_deterministic_algorithms(True) + # torch.use_deterministic_algorithms(True) os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' os.environ['PYTHONHASHSEED'] = str(seed) From ab00c7b5c375719fad49ea27f402228a60b2ebc4 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 20 Aug 2022 23:36:09 +0530 Subject: [PATCH 078/247] update doc detect->predict --- segment/predict.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 09efa844c6df..2bc2a5629d6f 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -3,7 +3,7 @@ Run inference on images, videos, directories, streams, etc. Usage - sources: - $ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam + $ python path/to/predict.py --weights yolov5s-seg.pt --source 0 # webcam img.jpg # image vid.mp4 # video path/ # directory @@ -12,7 +12,7 @@ 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream Usage - formats: - $ python path/to/detect.py --weights yolov5s.pt # PyTorch + $ python path/to/predict.py --weights yolov5s.pt # PyTorch yolov5s.torchscript # TorchScript yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov5s.xml # OpenVINO @@ -256,7 +256,7 @@ def parse_opt(): parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') - parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') + parser.add_argument('--project', default=ROOT / 'runs/predict_segment', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') From 38d210ea7f7e63d8b934d95a00db5a3a3cd3535b Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 20 Aug 2022 20:17:00 +0200 Subject: [PATCH 079/247] Resolve precommit for segment/train and segment/val --- segment/train.py | 15 ++++++--------- segment/val.py | 3 +-- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/segment/train.py b/segment/train.py index cd48e909a9f3..fe91893ad08d 100644 --- a/segment/train.py +++ b/segment/train.py @@ -19,7 +19,6 @@ import sys import time from copy import deepcopy -from datetime import datetime from pathlib import Path import numpy as np @@ -27,9 +26,8 @@ import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F -import yaml from torch.nn.parallel import DistributedDataParallel as DDP -from torch.optim import SGD, Adam, AdamW, lr_scheduler +from torch.optim import SGD, Adam, lr_scheduler from tqdm import tqdm import val # for end-of-epoch mAP @@ -43,10 +41,9 @@ from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors -from utils.autobatch import check_train_batch_size from utils.downloads import attempt_download -from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, - check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, +from utils.general import (check_dataset, check_file, check_git_status, check_img_size, + check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer) from utils.loggers import GenericLogger @@ -66,12 +63,12 @@ from utils.autobatch import check_train_batch_size from utils.general import LOGGER, check_amp, check_version -from utils.segment.metrics import BEST_KEYS, KEYS +from utils.segment.metrics import KEYS from utils.segment.plots import plot_images_and_masks, plot_results_with_masks def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary - save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio= \ + save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio @@ -437,7 +434,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), - #'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, + # 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, 'date': datetime.now().isoformat()} # Save last, best and delete diff --git a/segment/val.py b/segment/val.py index 2cd0c36264ef..2632e6deacd4 100644 --- a/segment/val.py +++ b/segment/val.py @@ -37,7 +37,6 @@ import pycocotools.mask as mask_util import torch.nn.functional as F -from models.common import DetectMultiBackend from models.experimental import attempt_load # scoped to avoid circular import from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords, @@ -262,7 +261,7 @@ def run( dt[0] += t2 - t1 # Inference - out, train_out = model(im) #if training else model(im, augment=augment, val=True) # inference, loss outputs + out, train_out = model(im) # if training else model(im, augment=augment, val=True) # inference, loss outputs dt[1] += time_sync() - t2 # Loss From e56df79dba6d8fe031e5eac0a64f9510ce5b6ffb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Aug 2022 18:17:24 +0000 Subject: [PATCH 080/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/train.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/segment/train.py b/segment/train.py index fe91893ad08d..102de6239342 100644 --- a/segment/train.py +++ b/segment/train.py @@ -42,10 +42,10 @@ from models.yolo import Model from utils.autoanchor import check_anchors from utils.downloads import attempt_download -from utils.general import (check_dataset, check_file, check_git_status, check_img_size, - check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, - increment_path, init_seeds, intersect_dicts, labels_to_class_weights, - labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer) +from utils.general import (check_dataset, check_file, check_git_status, check_img_size, check_requirements, + check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, + intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args, + print_mutation, strip_optimizer) from utils.loggers import GenericLogger from utils.plots import plot_evolve, plot_labels from utils.segment.dataloaders import create_dataloader From 28de97b49ef4a9ee667663ccbaeb66589374b027 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 20 Aug 2022 20:19:29 +0200 Subject: [PATCH 081/247] Resolve precommit for utils/segment --- utils/segment/dataloaders.py | 2 +- utils/segment/general.py | 1 - utils/segment/loss.py | 4 ++-- utils/segment/plots.py | 7 +++---- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py index f6fe642d077f..ced6f23bf151 100644 --- a/utils/segment/dataloaders.py +++ b/utils/segment/dataloaders.py @@ -11,11 +11,11 @@ import torch from torch.utils.data import DataLoader, distributed +from .augmentations import mixup, random_perspective from ..augmentations import augment_hsv, copy_paste, letterbox from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn from ..torch_utils import torch_distributed_zero_first -from .augmentations import mixup, random_perspective def create_dataloader(path, diff --git a/utils/segment/general.py b/utils/segment/general.py index 675fac4fbd92..bed445312cde 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -1,7 +1,6 @@ import time import cv2 -import numpy as np import torch import torch.nn.functional as F import torchvision diff --git a/utils/segment/loss.py b/utils/segment/loss.py index d1027a387f7e..e0a1823d43f0 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F +from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel -from .general import crop, masks_iou class MaskIOULoss(nn.Module): @@ -184,7 +184,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") lseg = crop(lseg, xyxy) lseg = lseg.mean(dim=(0, 1)) / w / h - return lseg.mean() #, iou# + lseg_iou.mean() + return lseg.mean() # , iou# + lseg_iou.mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 8974fdfe1274..2de7a54135d5 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -9,7 +9,7 @@ from PIL import Image from ..general import xywh2xyxy -from ..plots import Annotator, colors +from ..plots import colors def plot_masks(img, masks, colors, alpha=0.5): @@ -162,9 +162,8 @@ def plot_images_and_masks( if labels or conf[j] > 0.25: # 0.25 conf thresh label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}" plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) - mosaic[block_y:block_y + h, block_x:block_x + - w, :][mask] = mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + ( - np.array(color) * 0.65) + mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] = \ + mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65) # Draw image filename labels if paths: From 1f7138733d90c27e8f4930f38400b31c6bc90d92 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Aug 2022 18:20:00 +0000 Subject: [PATCH 082/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/dataloaders.py | 2 +- utils/segment/loss.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py index ced6f23bf151..f6fe642d077f 100644 --- a/utils/segment/dataloaders.py +++ b/utils/segment/dataloaders.py @@ -11,11 +11,11 @@ import torch from torch.utils.data import DataLoader, distributed -from .augmentations import mixup, random_perspective from ..augmentations import augment_hsv, copy_paste, letterbox from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn from ..torch_utils import torch_distributed_zero_first +from .augmentations import mixup, random_perspective def create_dataloader(path, diff --git a/utils/segment/loss.py b/utils/segment/loss.py index e0a1823d43f0..bff4b25ca867 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F -from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel +from .general import crop, masks_iou class MaskIOULoss(nn.Module): From fd9ffb0009852289c431ca5e31e763b47b8eb191 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 20 Aug 2022 20:22:54 +0200 Subject: [PATCH 083/247] Resolve precommit min_wh --- utils/segment/general.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index bed445312cde..a4999845a79d 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -34,7 +34,8 @@ def non_max_suppression_masks( assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" # Settings - min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections From 74eabbffc1a265c070be69f566872e0b96d012fc Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 20 Aug 2022 20:23:30 +0200 Subject: [PATCH 084/247] Resolve precommit utils/segment/plots --- utils/segment/plots.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 2de7a54135d5..17877505ac4d 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -76,14 +76,14 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None): def plot_images_and_masks( - images, - targets, - masks, - paths=None, - fname="images.jpg", - names=None, - max_size=640, - max_subplots=16, + images, + targets, + masks, + paths=None, + fname="images.jpg", + names=None, + max_size=640, + max_subplots=16, ): if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() @@ -163,7 +163,7 @@ def plot_images_and_masks( label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}" plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] = \ - mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65) + mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65) # Draw image filename labels if paths: @@ -210,7 +210,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11],) + 0.1 * data.values[:, 11], ) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From 791a905dc752828a981195a46c0db9fb65b1d03b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Aug 2022 18:23:55 +0000 Subject: [PATCH 085/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/plots.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 17877505ac4d..dafe5f9eb31a 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -76,14 +76,14 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None): def plot_images_and_masks( - images, - targets, - masks, - paths=None, - fname="images.jpg", - names=None, - max_size=640, - max_subplots=16, + images, + targets, + masks, + paths=None, + fname="images.jpg", + names=None, + max_size=640, + max_subplots=16, ): if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() @@ -210,7 +210,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11], ) + 0.1 * data.values[:, 11],) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From a752e671f1ee434e479ecdb88f02e5d755b7ee68 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 20 Aug 2022 20:27:17 +0200 Subject: [PATCH 086/247] Resolve precommit utils/segment/general --- utils/segment/general.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index a4999845a79d..f1655e488944 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -5,7 +5,7 @@ import torch.nn.functional as F import torchvision -from ..general import xywh2xyxy +from ..general import LOGGER, xywh2xyxy from ..metrics import box_iou @@ -53,11 +53,11 @@ def non_max_suppression_masks( # Cat apriori labels if autolabelling if labels and len(labels[xi]): - l = labels[xi] - v = torch.zeros((len(l), nc + 5), device=x.device) - v[:, :4] = l[:, 1:5] # box + lb = labels[xi] + v = torch.zeros((len(lb), nc + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box v[:, 4] = 1.0 # conf - v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls + v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image @@ -101,7 +101,7 @@ def non_max_suppression_masks( i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] - if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights @@ -111,7 +111,7 @@ def non_max_suppression_masks( output[xi] = x[i] if (time.time() - t) > time_limit: - print(f"WARNING: NMS time limit {time_limit}s exceeded") + LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded') break # time limit exceeded return output From 1a84f47a6a802431cad1fb1bfc93d5c35929e4bc Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 20 Aug 2022 20:37:00 +0200 Subject: [PATCH 087/247] Align NMS-seg closer to NMS --- utils/segment/general.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index f1655e488944..075dce192ddf 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -20,31 +20,32 @@ def non_max_suppression_masks( max_det=300, mask_dim=32, ): - """Runs Non-Maximum Suppression (NMS) on inference results + """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] """ + bs = prediction.shape[0] # batch size nc = prediction.shape[2] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Checks - assert (0 <= conf_thres <= 1), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" - assert (0 <= iou_thres <= 1), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' # Settings # min_wh = 2 # (pixels) minimum box width and height max_wh = 7680 # (pixels) maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() - time_limit = 10.0 # seconds to quit after + time_limit = 0.6 + 0.06 * bs # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS nm = 5 + mask_dim t = time.time() - output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * prediction.shape[0] + output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * bs for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height @@ -92,8 +93,6 @@ def non_max_suppression_masks( continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence - else: - x = x[x[:, 4].argsort(descending=True)] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes From 8b70e64fca7a3b7d9f77012dc0371a7af72f9c78 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 21 Aug 2022 00:34:22 +0200 Subject: [PATCH 088/247] restore deterministic init_seeds code --- segment/train.py | 2 +- utils/general.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/segment/train.py b/segment/train.py index 102de6239342..d1d7b8dc1686 100644 --- a/segment/train.py +++ b/segment/train.py @@ -99,7 +99,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary plots = not evolve and not opt.noplots # create plots overlap = opt.overlap_mask cuda = device.type != 'cpu' - init_seeds(opt.seed + 1 + RANK, True) + init_seeds(opt.seed + 1 + RANK, deterministic=False) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] diff --git a/utils/general.py b/utils/general.py index 30ad949e06cf..35c2e52cb6b3 100644 --- a/utils/general.py +++ b/utils/general.py @@ -242,7 +242,7 @@ def init_seeds(seed=0, deterministic=False): import torch.backends.cudnn as cudnn if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 - # torch.use_deterministic_algorithms(True) + torch.use_deterministic_algorithms(True) os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' os.environ['PYTHONHASHSEED'] = str(seed) From 6e4fbebc7e3a97cc71027edd9de5b1d48623c6a8 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sun, 21 Aug 2022 10:45:42 +0530 Subject: [PATCH 089/247] remove easydict dependency --- requirements.txt | 1 - utils/segment/metrics.py | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 241bbe62b6ac..10620566ca66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,6 @@ scipy>=1.4.1 torch>=1.7.0 torchvision>=0.8.1 tqdm>=4.64.0 -easydict>=1.9 protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012 # Logging ------------------------------------- diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py index 981d90252ec9..b09ce23fb9e3 100644 --- a/utils/segment/metrics.py +++ b/utils/segment/metrics.py @@ -4,7 +4,6 @@ """ import numpy as np -from easydict import EasyDict as edict from ..metrics import ap_per_class @@ -48,7 +47,7 @@ def ap_per_class_box_and_mask( names=names, prefix="Mask")[2:] - results = edict({ + results = { "boxes": { "p": results_boxes[0], "r": results_boxes[1], @@ -60,7 +59,7 @@ def ap_per_class_box_and_mask( "r": results_masks[1], "ap": results_masks[3], "f1": results_masks[2], - "ap_class": results_masks[4]}}) + "ap_class": results_masks[4]}} return results From c6f3b6ed1a98f2a8f4aff6aa6d93a41d073a35cc Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sun, 21 Aug 2022 10:47:47 +0530 Subject: [PATCH 090/247] update --- segment/train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/segment/train.py b/segment/train.py index d1d7b8dc1686..8323bcdc137d 100644 --- a/segment/train.py +++ b/segment/train.py @@ -390,7 +390,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary if ni == 10: files = sorted(save_dir.glob('train*.jpg')) - logger.log_images(files, "Mosaics") + logger.log_images(files, "Mosaics", epoch) # end batch ------------------------------------------------------------------------------------------------ # Scheduler @@ -424,7 +424,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary logger.log_metrics(metrics_dict, epoch) if plots: files = sorted(save_dir.glob('val*.jpg')) - logger.log_images(files, "Validation") + logger.log_images(files, "Validation", epoch) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { @@ -491,7 +491,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") - logger.log_images(files, "Results") + logger.log_images(files, "Results", epoch+1) torch.cuda.empty_cache() return results From d1327d2b76d521b9d65caa2c8cdfca06eff836e8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Aug 2022 05:18:14 +0000 Subject: [PATCH 091/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/train.py b/segment/train.py index 8323bcdc137d..bc0792d5267c 100644 --- a/segment/train.py +++ b/segment/train.py @@ -491,7 +491,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") - logger.log_images(files, "Results", epoch+1) + logger.log_images(files, "Results", epoch + 1) torch.cuda.empty_cache() return results From 466ab71f56d879ddda53c0262e81577fd4fb06ff Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sun, 21 Aug 2022 13:39:34 +0530 Subject: [PATCH 092/247] restore output_to_target mask --- segment/val.py | 4 ++-- utils/segment/plots.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/segment/val.py b/segment/val.py index 2632e6deacd4..76f6af4f8859 100644 --- a/segment/val.py +++ b/segment/val.py @@ -42,11 +42,11 @@ coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou -from utils.plots import output_to_target, plot_val_study +from utils.plots import plot_val_study from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks from utils.segment.metrics import Metrics, ap_per_class_box_and_mask -from utils.segment.plots import plot_images_and_masks +from utils.segment.plots import plot_images_and_masks, output_to_target from utils.torch_utils import de_parallel, select_device, time_sync diff --git a/utils/segment/plots.py b/utils/segment/plots.py index dafe5f9eb31a..641988e1030d 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -239,3 +239,12 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): ax[1].legend() fig.savefig(save_dir / "results.png", dpi=200) plt.close() + +def output_to_target(output, filter_dets=10): + # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] + targets = [] + for i, o in enumerate(output): + o = o[:filter_dets] + for *box, conf, cls in o.cpu().numpy()[:, :6]: + targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) + return np.array(targets) From 7724c710be9e42e9138ba29982cfb2c607bc1ae0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Aug 2022 08:10:01 +0000 Subject: [PATCH 093/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/val.py | 2 +- utils/segment/plots.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index 76f6af4f8859..c17f41458603 100644 --- a/segment/val.py +++ b/segment/val.py @@ -46,7 +46,7 @@ from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks from utils.segment.metrics import Metrics, ap_per_class_box_and_mask -from utils.segment.plots import plot_images_and_masks, output_to_target +from utils.segment.plots import output_to_target, plot_images_and_masks from utils.torch_utils import de_parallel, select_device, time_sync diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 641988e1030d..c810d2182d80 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -240,6 +240,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): fig.savefig(save_dir / "results.png", dpi=200) plt.close() + def output_to_target(output, filter_dets=10): # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] targets = [] From 08735aa91f7b69b0e5ec65d44721b23b8e0661eb Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sun, 21 Aug 2022 13:53:32 +0530 Subject: [PATCH 094/247] update --- utils/segment/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 641988e1030d..431880de84b5 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -8,7 +8,7 @@ import torch from PIL import Image -from ..general import xywh2xyxy +from ..general import xywh2xyxy, xyxy2xywh from ..plots import colors From 75f617f81a68255577081a5560a0511ce661b90e Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sun, 21 Aug 2022 23:13:02 +0530 Subject: [PATCH 095/247] cleanup --- utils/loggers/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 6a2734b26782..97df1371ec6f 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -15,7 +15,6 @@ from utils.loggers.clearml.clearml_utils import ClearmlLogger from utils.loggers.wandb.wandb_utils import WandbLogger from utils.plots import plot_images, plot_labels, plot_results -from utils.segment.plots import plot_images_and_masks, plot_results_with_masks from utils.torch_utils import de_parallel LOGGERS = ('csv', 'tb', 'wandb', 'clearml') # *.csv, TensorBoard, Weights & Biases, ClearML From ba62c62f2a363960f4da07e7dcd9bddeacde109d Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 21 Aug 2022 23:48:03 +0200 Subject: [PATCH 096/247] Remove unused ImageFont import --- utils/general.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/general.py b/utils/general.py index 47fdcf6d9b3c..3bc6fbc22d57 100644 --- a/utils/general.py +++ b/utils/general.py @@ -33,7 +33,6 @@ import torch import torchvision import yaml -from PIL import ImageFont from utils.downloads import gsutil_getsize from utils.metrics import box_iou, fitness From 55ef06a660f4707c5d9a7916acfa50154c731a7b Mon Sep 17 00:00:00 2001 From: glennjocher Date: Mon, 22 Aug 2022 00:28:57 +0200 Subject: [PATCH 097/247] Unified NMS --- segment/predict.py | 6 +- segment/val.py | 20 +++---- utils/general.py | 24 ++++---- utils/segment/general.py | 122 +-------------------------------------- 4 files changed, 30 insertions(+), 142 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 2bc2a5629d6f..1adb02348da3 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -41,9 +41,9 @@ from models.experimental import attempt_load from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, - increment_path, print_args, scale_coords, strip_optimizer, xyxy2xywh) + increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box -from utils.segment.general import non_max_suppression_masks, process_mask_upsample, scale_masks +from utils.segment.general import process_mask_upsample, scale_masks from utils.segment.plots import plot_masks from utils.torch_utils import select_device, time_sync @@ -130,7 +130,7 @@ def run( dt[1] += t3 - t2 # NMS - pred = non_max_suppression_masks(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32) dt[2] += time_sync() - t3 # Second-stage classifier (optional) diff --git a/segment/val.py b/segment/val.py index c17f41458603..fdc318d3930b 100644 --- a/segment/val.py +++ b/segment/val.py @@ -39,12 +39,12 @@ from models.experimental import attempt_load # scoped to avoid circular import from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, - coco80_to_coco91_class, colorstr, emojis, increment_path, print_args, scale_coords, - xywh2xyxy, xyxy2xywh) + coco80_to_coco91_class, colorstr, emojis, increment_path, non_max_suppression, print_args, + scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou from utils.plots import plot_val_study from utils.segment.dataloaders import create_dataloader -from utils.segment.general import mask_iou, non_max_suppression_masks, process_mask, process_mask_upsample, scale_masks +from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import output_to_target, plot_images_and_masks from utils.torch_utils import de_parallel, select_device, time_sync @@ -272,13 +272,13 @@ def run( targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() - out = non_max_suppression_masks(out, - conf_thres, - iou_thres, - labels=lb, - multi_label=True, - agnostic=single_cls, - mask_dim=de_parallel(model).model[-1].mask_dim) + out = non_max_suppression(out, + conf_thres, + iou_thres, + labels=lb, + multi_label=True, + agnostic=single_cls, + masks=de_parallel(model).model[-1].mask_dim) dt[2] += time_sync() - t3 # keep pred masks for plotting diff --git a/utils/general.py b/utils/general.py index 3bc6fbc22d57..e00f69309fcf 100644 --- a/utils/general.py +++ b/utils/general.py @@ -823,8 +823,10 @@ def non_max_suppression(prediction, agnostic=False, multi_label=False, labels=(), - max_det=300): - """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes + max_det=300, + masks=0, + ): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] @@ -842,13 +844,14 @@ def non_max_suppression(prediction, # min_wh = 2 # (pixels) minimum box width and height max_wh = 7680 # (pixels) maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() - time_limit = 0.3 + 0.03 * bs # seconds to quit after + time_limit = 0.5 + 0.05 * bs # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() - output = [torch.zeros((0, 6), device=prediction.device)] * bs + si = 5 + masks # box/mask start index + output = [torch.zeros((0, 6 + masks), device=prediction.device)] * bs for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height @@ -870,16 +873,17 @@ def non_max_suppression(prediction, # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf - # Box (center x, center y, width, height) to (x1, y1, x2, y2) - box = xywh2xyxy(x[:, :4]) + # Box/Mask + box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2) + mask = x[:, 5:si] # zero columns if no masks # Detections matrix nx6 (xyxy, conf, cls) if multi_label: - i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T - x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + i, j = (x[:, si:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + si, None], j[:, None].float(), mask[i]), 1) else: # best class only - conf, j = x[:, 5:].max(1, keepdim=True) - x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + conf, j = x[:, si:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: diff --git a/utils/segment/general.py b/utils/segment/general.py index 075dce192ddf..c1ca23c344fa 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -1,119 +1,6 @@ -import time - import cv2 import torch import torch.nn.functional as F -import torchvision - -from ..general import LOGGER, xywh2xyxy -from ..metrics import box_iou - - -def non_max_suppression_masks( - prediction, - conf_thres=0.25, - iou_thres=0.45, - classes=None, - agnostic=False, - multi_label=False, - labels=(), - max_det=300, - mask_dim=32, -): - """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections - - Returns: - list of detections, on (n,6) tensor per image [xyxy, conf, cls] - """ - - bs = prediction.shape[0] # batch size - nc = prediction.shape[2] - 5 # number of classes - xc = prediction[..., 4] > conf_thres # candidates - - # Checks - assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' - assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' - - # Settings - # min_wh = 2 # (pixels) minimum box width and height - max_wh = 7680 # (pixels) maximum box width and height - max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() - time_limit = 0.6 + 0.06 * bs # seconds to quit after - redundant = True # require redundant detections - multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) - merge = False # use merge-NMS - nm = 5 + mask_dim - - t = time.time() - output = [torch.zeros((0, 6 + mask_dim), device=prediction.device)] * bs - for xi, x in enumerate(prediction): # image index, image inference - # Apply constraints - # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height - x = x[xc[xi]] # confidence - pred_masks = x[:, 5:nm] - - # Cat apriori labels if autolabelling - if labels and len(labels[xi]): - lb = labels[xi] - v = torch.zeros((len(lb), nc + 5), device=x.device) - v[:, :4] = lb[:, 1:5] # box - v[:, 4] = 1.0 # conf - v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls - x = torch.cat((x, v), 0) - - # If none remain process next image - if not x.shape[0]: - continue - - # Compute conf - x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf - - # Box (center x, center y, width, height) to (x1, y1, x2, y2) - box = xywh2xyxy(x[:, :4]) - - # Detections matrix nx6 (xyxy, conf, cls) - if multi_label: - i, j = (x[:, nm:] > conf_thres).nonzero(as_tuple=False).T - x = torch.cat((box[i], x[i, j + nm, None], j[:, None].float(), pred_masks[i]), 1) - else: # best class only - conf, j = x[:, nm:].max(1, keepdim=True) - x = torch.cat((box, conf, j.float(), pred_masks), 1)[conf.view(-1) > conf_thres] - - # Filter by class - if classes is not None: - x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] - - # Apply finite constraint - # if not torch.isfinite(x).all(): - # x = x[torch.isfinite(x).all(1)] - - # Check shape - n = x.shape[0] # number of boxes - if not n: # no boxes - continue - elif n > max_nms: # excess boxes - x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence - - # Batched NMS - c = x[:, 5:6] * (0 if agnostic else max_wh) # classes - boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores - i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS - if i.shape[0] > max_det: # limit detections - i = i[:max_det] - if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) - # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) - iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix - weights = iou * scores[None] # box weights - x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes - if redundant: - i = i[iou.sum(1) > 1] # require redundancy - - output[xi] = x[i] - if (time.time() - t) > time_limit: - LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded') - break # time limit exceeded - - return output def crop(masks, boxes): @@ -127,10 +14,7 @@ def crop(masks, boxes): """ h, w, n = masks.size() x1, x2 = boxes[:, 0], boxes[:, 2] - y1, y2 = ( - boxes[:, 1], - boxes[:, 3], - ) + y1, y2 = boxes[:, 1], boxes[:, 3] rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n)) cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n)) @@ -150,7 +34,7 @@ def crop(masks, boxes): def process_mask_upsample(proto_out, out_masks, bboxes, shape): """ - Crop after unsample. + Crop after upsample. proto_out: [mask_dim, mask_h, mask_w] out_masks: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms @@ -171,7 +55,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): """ - Crop before unsample. + Crop before upsample. proto_out: [mask_dim, mask_h, mask_w] out_masks: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms From 7ce737835b401949b0ecc3c3b29c78ab629adebf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:29:25 +0000 Subject: [PATCH 098/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/general.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/utils/general.py b/utils/general.py index e00f69309fcf..565b53b6496d 100644 --- a/utils/general.py +++ b/utils/general.py @@ -816,16 +816,17 @@ def clip_coords(boxes, shape): boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 -def non_max_suppression(prediction, - conf_thres=0.25, - iou_thres=0.45, - classes=None, - agnostic=False, - multi_label=False, - labels=(), - max_det=300, - masks=0, - ): +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + masks=0, +): """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections Returns: From 7ebd19d1e1148d58e9cc284118355f05720b5e86 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Mon, 22 Aug 2022 01:47:39 +0200 Subject: [PATCH 099/247] DetectMultiBackend compatibility --- models/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/common.py b/models/common.py index d308244c4a44..5d49da77a35e 100644 --- a/models/common.py +++ b/models/common.py @@ -333,6 +333,7 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() + segmentation_model = type(model.model[-1]).__name__ == 'DetectSegment' elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata @@ -466,7 +467,7 @@ def forward(self, im, augment=False, visualize=False, val=False): if self.pt: # PyTorch y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im) - if isinstance(y, tuple): + if isinstance(y, tuple) and not self.segmentation_model: y = y[0] elif self.jit: # TorchScript y = self.model(im)[0] From 261bec1ee6018f536f632332fb4462278ffb6dcc Mon Sep 17 00:00:00 2001 From: glennjocher Date: Mon, 22 Aug 2022 01:48:06 +0200 Subject: [PATCH 100/247] segment/predict.py update --- segment/predict.py | 135 +++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 71 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 1adb02348da3..1b6eb9d35c95 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -1,31 +1,32 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ -Run inference on images, videos, directories, streams, etc. +Run YOLOv5 segmentation inference on images, videos, directories, streams, etc. Usage - sources: - $ python path/to/predict.py --weights yolov5s-seg.pt --source 0 # webcam - img.jpg # image - vid.mp4 # video - path/ # directory - path/*.jpg # glob - 'https://youtu.be/Zgi9g1ksQHc' # YouTube - 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream + $ python segment/predict.py --weights yolov5s-seg.pt --source 0 # webcam + img.jpg # image + vid.mp4 # video + path/ # directory + 'path/*.jpg' # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream Usage - formats: - $ python path/to/predict.py --weights yolov5s.pt # PyTorch - yolov5s.torchscript # TorchScript - yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn - yolov5s.xml # OpenVINO - yolov5s.engine # TensorRT - yolov5s.mlmodel # CoreML (macOS-only) - yolov5s_saved_model # TensorFlow SavedModel - yolov5s.pb # TensorFlow GraphDef - yolov5s.tflite # TensorFlow Lite - yolov5s_edgetpu.tflite # TensorFlow Edge TPU + $ python segment/predict.py --weights yolov5s-seg.pt # PyTorch + yolov5s-seg.torchscript # TorchScript + yolov5s-seg.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s-seg.xml # OpenVINO + yolov5s-seg.engine # TensorRT + yolov5s-seg.mlmodel # CoreML (macOS-only) + yolov5s-seg_saved_model # TensorFlow SavedModel + yolov5s-seg.pb # TensorFlow GraphDef + yolov5s-seg.tflite # TensorFlow Lite + yolov5s-seg_edgetpu.tflite # TensorFlow Edge TPU """ import argparse import os +import platform import sys from pathlib import Path @@ -38,20 +39,21 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.experimental import attempt_load +from models.common import DetectMultiBackend from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams -from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, +from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.segment.general import process_mask_upsample, scale_masks from utils.segment.plots import plot_masks -from utils.torch_utils import select_device, time_sync +from utils.torch_utils import select_device, smart_inference_mode -@torch.no_grad() +@smart_inference_mode() def run( - weights=ROOT / 'yolov5s.pt', # model.pt path(s) + weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + data=ROOT / 'data/coco128.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold @@ -67,13 +69,14 @@ def run( augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models - project=ROOT / 'runs/predict_segment', # save results to project/name + project=ROOT / 'runs/predict-seg', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images @@ -89,11 +92,8 @@ def run( # Load model device = select_device(device) - model = attempt_load(weights, device=device, inplace=True, fuse=True) - stride = max(int(model.stride.max()), 32) # model stride - names = model.module.names if hasattr(model, 'module') else model.names # get class names - model.half() if half else model.float() - pt = True + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader @@ -108,30 +108,25 @@ def run( vid_path, vid_writer = [None] * bs, [None] * bs # Run inference - if str(device) != "cpu": - im = torch.zeros(1, 3, *imgsz).to(device).half() # input image - model(im) # warmup - seen, windows, dt = 0, [], [0.0, 0.0, 0.0] + model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup + seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) for path, im, im0s, vid_cap, s in dataset: - t1 = time_sync() - im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 - im /= 255 # 0 - 255 to 0.0 - 1.0 - if len(im.shape) == 3: - im = im[None] # expand for batch dim - t2 = time_sync() - dt[0] += t2 - t1 + with dt[0]: + im = torch.from_numpy(im).to(device) + im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim # Inference - visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False - pred, out = model(im, augment=augment, visualize=visualize) - proto = out[1] - t3 = time_sync() - dt[1] += t3 - t2 + with dt[1]: + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred, out = model(im, augment=augment, visualize=visualize) + proto = out[1] # NMS - pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32) - dt[2] += time_sync() - t3 + with dt[2]: + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) @@ -153,14 +148,13 @@ def run( imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): - # mask stuff + # Mask additions --------------------------------------------------------------------------------------- masks_conf = det[:, 6:] - # binary mask, (img_h, img_w, n) - masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:]) - # n, img_h, img_w - masks = masks.permute(2, 0, 1).contiguous() - # bbox stuff - det = det[:, :6] # update the value in outputs, remove mask part. + masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:]) # binary_mask(imh,imw,n) + masks = masks.permute(2, 0, 1).contiguous() # shape(n,imh,imw) + det = det[:, :6] # remove masks + # Mask additions --------------------------------------------------------------------------------------- + # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() @@ -169,19 +163,16 @@ def run( n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string - # plot masks + # Mask plotting ---------------------------------------------------------------------------------------- mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))] - # NOTE: this way to draw masks is faster, - # but the image might get blurred, - # from https://github.com/dbolya/yolact - # image with masks, (img_h, img_w, 3) - img_masks = plot_masks(im[i], masks, mcolors) - # scale image to original hw - img_masks = scale_masks(im.shape[2:], img_masks, im0.shape) + # NOTE: this plot method is faster, but the image might get blurred https://github.com/dbolya/yolact + img_masks = plot_masks(im[i], masks, mcolors) # image with masks shape(imh,imw,3) + img_masks = scale_masks(im.shape[2:], img_masks, im0.shape) # scale to original h, w annotator.im = img_masks + # Mask plotting ---------------------------------------------------------------------------------------- # Write results - for j, (*xyxy, conf, cls) in enumerate(det): + for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format @@ -191,14 +182,14 @@ def run( if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') - annotator.box_label(xyxy, label, color=colors(j, True)) + annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: - if p not in windows: + if platform.system() == 'Linux' and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) @@ -225,22 +216,23 @@ def run( vid_writer[i].write(im0) # Print time (inference-only) - LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)') + LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results - t = tuple(x / seen * 1E3 for x in dt) # speeds per image + t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: - strip_optimizer(weights) # update model (to fix SourceChangeWarning) + strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') @@ -256,13 +248,14 @@ def parse_opt(): parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') - parser.add_argument('--project', default=ROOT / 'runs/predict_segment', help='save results to project/name') + parser.add_argument('--project', default=ROOT / 'runs/predict-seg', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) From 0095547f6211a9bd7d4f40c829dc70b60dc0c8ca Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 22 Aug 2022 11:59:17 +0800 Subject: [PATCH 101/247] update plot colors --- segment/predict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/predict.py b/segment/predict.py index 1b6eb9d35c95..c24869a8866c 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -164,7 +164,7 @@ def run( s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Mask plotting ---------------------------------------------------------------------------------------- - mcolors = [colors(int(cls)) for cls in range(len(det[:, 5]))] + mcolors = [colors(int(cls), True) for cls in det[:, 5]] # NOTE: this plot method is faster, but the image might get blurred https://github.com/dbolya/yolact img_masks = plot_masks(im[i], masks, mcolors) # image with masks shape(imh,imw,3) img_masks = scale_masks(im.shape[2:], img_masks, im0.shape) # scale to original h, w From 139640cf84f312118febcab5864377f49e058bab Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 22 Aug 2022 11:59:40 +0800 Subject: [PATCH 102/247] fix bbox shifted --- models/yolo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/yolo.py b/models/yolo.py index 15ef5023acf3..e6c2143e4b31 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -135,7 +135,7 @@ def forward(self, x): y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 - xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + xy = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy.type_as(y), wh.type_as(y), y[..., 4:]), -1) z.append(y.view(-1, self.na * ny * nx, self.no)) From cabb99d61a0765440991cdeea6c9098932f7e345 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 22 Aug 2022 11:59:59 +0800 Subject: [PATCH 103/247] sort bbox by confidence --- utils/general.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/general.py b/utils/general.py index 565b53b6496d..1468c8c4d21f 100644 --- a/utils/general.py +++ b/utils/general.py @@ -900,6 +900,8 @@ def non_max_suppression( continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + else: + x = x[x[:, 4].argsort(descending=True)] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes From 63daead45242ae5b69ce0f7807389d4a0f420b05 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 22 Aug 2022 11:33:55 +0530 Subject: [PATCH 104/247] enable overlap by default --- segment/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/segment/train.py b/segment/train.py index bc0792d5267c..55b9c53a7ef8 100644 --- a/segment/train.py +++ b/segment/train.py @@ -97,7 +97,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary # Config plots = not evolve and not opt.noplots # create plots - overlap = opt.overlap_mask + overlap = not opt.no_overlap cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=False) with torch_distributed_zero_first(LOCAL_RANK): @@ -536,7 +536,7 @@ def parse_opt(known=False): # Instance Segmentation Args parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory') - parser.add_argument('--overlap-mask', + parser.add_argument('--no-overlap', action='store_true', help='Overlapping masks train faster at the cost of slight accuray decrease') From 28ff5fe06b1a3b497ac389f7113451ac51eae143 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Mon, 22 Aug 2022 23:36:25 +0200 Subject: [PATCH 105/247] Merge detect/segment output_to_target() function --- segment/val.py | 6 +++--- utils/plots.py | 10 ++++++---- utils/segment/plots.py | 10 ---------- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/segment/val.py b/segment/val.py index fdc318d3930b..7438426dfb88 100644 --- a/segment/val.py +++ b/segment/val.py @@ -42,11 +42,11 @@ coco80_to_coco91_class, colorstr, emojis, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou -from utils.plots import plot_val_study +from utils.plots import output_to_target, plot_val_study from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks from utils.segment.metrics import Metrics, ap_per_class_box_and_mask -from utils.segment.plots import output_to_target, plot_images_and_masks +from utils.segment.plots import plot_images_and_masks from utils.torch_utils import de_parallel, select_device, time_sync @@ -345,7 +345,7 @@ def run( plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels plot_masks = torch.cat(plot_masks, dim=0) - plot_images_and_masks(im, output_to_target(out, filter_dets=15), plot_masks, paths, + plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred # Compute metrics diff --git a/utils/plots.py b/utils/plots.py index d35e2bdd168a..cbdbd7da1428 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -177,12 +177,14 @@ def butter_lowpass(cutoff, fs, order): return filtfilt(b, a, data) # forward-backward filter -def output_to_target(output): - # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] +def output_to_target(output, max_det=300): + # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting targets = [] for i, o in enumerate(output): - targets.extend([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf] for *box, conf, cls in o.cpu().numpy()) - return np.array(targets) + box, conf, cls = o[:max_det].cpu().split((4, 1, 1), 1) + j = torch.full((conf.shape[0], 1), i) + targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1)) + return torch.cat(targets, 0).numpy() @threaded diff --git a/utils/segment/plots.py b/utils/segment/plots.py index c1afa60786c6..da87245ab885 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -239,13 +239,3 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): ax[1].legend() fig.savefig(save_dir / "results.png", dpi=200) plt.close() - - -def output_to_target(output, filter_dets=10): - # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] - targets = [] - for i, o in enumerate(output): - o = o[:filter_dets] - for *box, conf, cls in o.cpu().numpy()[:, :6]: - targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) - return np.array(targets) From 8c0eb6d276b339ae75eb8e6c8c11b135b8f8e864 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Mon, 22 Aug 2022 23:49:31 +0200 Subject: [PATCH 106/247] Start segmentation CI --- .github/workflows/ci-testing.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 4ef930c61233..bd3a31bf2379 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -123,6 +123,29 @@ jobs: model = torch.hub.load('.', 'custom', path=path, source='local') print(model('data/images/bus.jpg')) EOF + - name: Test segmentation + shell: bash # for Windows compatibility + run: | + m=${{ matrix.model }}-seg # official weights + b=runs/train-seg/exp/weights/best # best.pt checkpoint + python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train +# python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train +# for d in cpu; do # devices +# for w in $m $b; do # weights +# python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d # val +# python segment/predict.py --imgsz 64 --weights $w.pt --device $d # detect +# done +# done +# # python hubconf.py --model $m # hub +# # python models/tf.py --weights $m.pt # build TF model +# python models/yolo.py --cfg $m.yaml # build PyTorch model +# python export.py --weights $m.pt --img 64 --include torchscript # export +# python - < Date: Tue, 23 Aug 2022 22:09:53 +0530 Subject: [PATCH 107/247] fix plots --- utils/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/plots.py b/utils/plots.py index cbdbd7da1428..4628ca632a46 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -181,7 +181,7 @@ def output_to_target(output, max_det=300): # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting targets = [] for i, o in enumerate(output): - box, conf, cls = o[:max_det].cpu().split((4, 1, 1), 1) + box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1) j = torch.full((conf.shape[0], 1), i) targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1)) return torch.cat(targets, 0).numpy() From 17a979c520e672ef395a1de9aa1db689fe04b3c1 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 23 Aug 2022 18:55:34 +0200 Subject: [PATCH 108/247] Update ci-testing.yml --- .github/workflows/ci-testing.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index bd3a31bf2379..5074dd4093f3 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -128,7 +128,14 @@ jobs: run: | m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint - python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train + + # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- + m=yolov5s-seg # official weights + python segment/predict.py --imgsz 64 --weights $m.pt --device cpu # detect + python export.py --weights $m.pt --img 64 --include torchscript # export + # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- + +# python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train # python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train # for d in cpu; do # devices # for w in $m $b; do # weights From 8b8ea38c184c04e3bea0a449f8358bbf769f7595 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 19:50:06 +0200 Subject: [PATCH 109/247] fix training whitespace --- .github/workflows/ci-testing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 5074dd4093f3..0edd05dbf86c 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -128,13 +128,13 @@ jobs: run: | m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint - + # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- m=yolov5s-seg # official weights python segment/predict.py --imgsz 64 --weights $m.pt --device cpu # detect python export.py --weights $m.pt --img 64 --include torchscript # export # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- - + # python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train # python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train # for d in cpu; do # devices From b2c6d0917e6dbb6232b62f4bc43b848b1504243d Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 21:49:15 +0200 Subject: [PATCH 110/247] optimize process mask functions (can we merge both?) --- utils/segment/general.py | 41 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index c1ca23c344fa..fe4898b2cdd4 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -42,15 +42,12 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): return: h, w, n """ - # mask_h, mask_w, n - masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T - masks = masks.sigmoid() - masks = masks.permute(2, 0, 1).contiguous() - # [n, mask_h, mask_w] - masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0) - # [mask_h, mask_w, n] - masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes) - return masks.gt_(0.5) # .gt_(0.2) + + c, mh, mw = proto_out.shape # CHW + masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw) + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes) # HWC + return masks.gt_(0.5) def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): @@ -63,23 +60,21 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): return: h, w, n """ - downsampled_bboxes = bboxes.clone() - mh, mw = proto_out.shape[1:] + + c, mh, mw = proto_out.shape # CHW ih, iw = shape - # mask_h, mask_w, n - masks = proto_out.float().permute(1, 2, 0).contiguous() @ out_masks.float().tanh().T - # print(masks) - masks = masks.sigmoid() - # print('after sigmoid:', masks) - downsampled_bboxes[:, 0] = downsampled_bboxes[:, 0] / iw * mw - downsampled_bboxes[:, 2] = downsampled_bboxes[:, 2] / iw * mw - downsampled_bboxes[:, 1] = downsampled_bboxes[:, 1] / ih * mh - downsampled_bboxes[:, 3] = downsampled_bboxes[:, 3] / ih * mh - masks = crop(masks, downsampled_bboxes) + masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw) # CHW + + downsampled_bboxes = bboxes.clone() + downsampled_bboxes[:, 0] *= mw / iw + downsampled_bboxes[:, 2] *= mw / iw + downsampled_bboxes[:, 3] *= mh / ih + downsampled_bboxes[:, 1] *= mh / ih + masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes) # HWC + masks = masks.permute(2, 0, 1).contiguous() - # [n, mask_h, mask_w] if upsample: - masks = F.interpolate(masks.unsqueeze(0), shape, mode='bilinear', align_corners=False).squeeze(0) + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5).permute(1, 2, 0).contiguous() From d189aabe3d43ee2b4410134bd2b659e202f929fe Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 22:15:06 +0200 Subject: [PATCH 111/247] Update predict/detect --- detect.py | 4 ++-- segment/predict.py | 18 +++++++----------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/detect.py b/detect.py index 60a821b59a03..3af6baa0edc2 100644 --- a/detect.py +++ b/detect.py @@ -149,8 +149,8 @@ def run( det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results - for c in det[:, -1].unique(): - n = (det[:, -1] == c).sum() # detections per class + for c in det[:, 5].unique(): + n = (det[:, 5] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results diff --git a/segment/predict.py b/segment/predict.py index c24869a8866c..b29f3d2dfd8a 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -149,30 +149,26 @@ def run( annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Mask additions --------------------------------------------------------------------------------------- - masks_conf = det[:, 6:] - masks = process_mask_upsample(proto[i], masks_conf, det[:, :4], im.shape[2:]) # binary_mask(imh,imw,n) - masks = masks.permute(2, 0, 1).contiguous() # shape(n,imh,imw) - det = det[:, :6] # remove masks + masks = process_mask_upsample(proto[i], det[:, 6:], det[:, :4], im.shape[2:]) # HWC + masks = masks.permute(2, 0, 1).contiguous() # CHW # Mask additions --------------------------------------------------------------------------------------- # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results - for c in det[:, -1].unique(): - n = (det[:, -1] == c).sum() # detections per class + for c in det[:, 5].unique(): + n = (det[:, 5] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Mask plotting ---------------------------------------------------------------------------------------- mcolors = [colors(int(cls), True) for cls in det[:, 5]] - # NOTE: this plot method is faster, but the image might get blurred https://github.com/dbolya/yolact - img_masks = plot_masks(im[i], masks, mcolors) # image with masks shape(imh,imw,3) - img_masks = scale_masks(im.shape[2:], img_masks, im0.shape) # scale to original h, w - annotator.im = img_masks + im_masks = plot_masks(im[i], masks, mcolors) # image with masks shape(imh,imw,3) + annotator.im = scale_masks(im.shape[2:], im_masks, im0.shape) # scale to original h, w # Mask plotting ---------------------------------------------------------------------------------------- # Write results - for *xyxy, conf, cls in reversed(det): + for *xyxy, conf, cls in reversed(det[:, :6]): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format From 4cf5775abee316a5cc22612a8c2c1dabeec59315 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 22:50:15 +0200 Subject: [PATCH 112/247] Update plot_images --- utils/plots.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/utils/plots.py b/utils/plots.py index 4628ca632a46..e7dbe92ced1a 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -188,17 +188,20 @@ def output_to_target(output, max_det=300): @threaded -def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16): +def plot_images(images, targets, paths=None, fname='images.jpg', names=None): # Plot image grid with labels if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() - if np.max(images[0]) <= 1: - images *= 255 # de-normalise (optional) + + max_size = 1920 # max image size + max_subplots = 16 # max image subplots, i.e. 4x4 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) + if np.max(images[0]) <= 1: + images *= 255 # de-normalise (optional) # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init From 00a7117413901348a9ed8157532578992a4e6ff8 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 23:00:10 +0200 Subject: [PATCH 113/247] Update plot_images_and_masks --- utils/plots.py | 5 ++ utils/segment/plots.py | 182 ++++++++++++++--------------------------- 2 files changed, 68 insertions(+), 119 deletions(-) diff --git a/utils/plots.py b/utils/plots.py index e7dbe92ced1a..3c1fc92d167a 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -121,6 +121,11 @@ def text(self, xy, text, txt_color=(255, 255, 255)): w, h = self.font.getsize(text) # text width, height self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font) + def fromarray(self, im): + # Update self.im from a numpy array + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + def result(self): # Return annotated image as array return np.asarray(self.im) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index da87245ab885..b7b2c0fca0de 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -6,10 +6,9 @@ import numpy as np import pandas as pd import torch -from PIL import Image -from ..general import xywh2xyxy, xyxy2xywh -from ..plots import colors +from ..general import threaded, xywh2xyxy +from ..plots import Annotator, colors def plot_masks(img, masks, colors, alpha=0.5): @@ -50,152 +49,97 @@ def plot_masks(img, masks, colors, alpha=0.5): return (img_gpu * 255).byte().cpu().numpy() -def plot_one_box(x, img, color=None, label=None, line_thickness=None): - import random - - # Plots one bounding box on image img - tl = (line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1) # line/font thickness - color = color or [random.randint(0, 255) for _ in range(3)] - c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) - cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) - if label: - tf = max(tl - 1, 1) # font thickness - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 - cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled - cv2.putText( - img, - label, - (c1[0], c1[1] - 2), - 0, - tl / 3, - [225, 255, 255], - thickness=tf, - lineType=cv2.LINE_AA, - ) - - -def plot_images_and_masks( - images, - targets, - masks, - paths=None, - fname="images.jpg", - names=None, - max_size=640, - max_subplots=16, -): +@threaded +def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None): + # Plot image grid with labels if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() if isinstance(masks, torch.Tensor): - masks = masks.cpu().numpy() - masks = masks.astype(int) + masks = masks.cpu().numpy().astype(int) - # un-normalise - if np.max(images[0]) <= 1: - images *= 255 - - tl = 3 # line thickness - tf = max(tl - 1, 1) # font thickness + max_size = 1920 # max image size + max_subplots = 16 # max image subplots, i.e. 4x4 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) + if np.max(images[0]) <= 1: + images *= 255 # de-normalise (optional) - # Check if we should resize - scale_factor = max_size / max(h, w) - if scale_factor < 1: - h = math.ceil(scale_factor * h) - w = math.ceil(scale_factor * w) - + # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, img in enumerate(images): + for i, im in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break - - block_x = int(w * (i // ns)) - block_y = int(h * (i % ns)) - - img = img.transpose(1, 2, 0) - if scale_factor < 1: - img = cv2.resize(img, (w, h)) - - mosaic[block_y:block_y + h, block_x:block_x + w, :] = img + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y:y + h, x:x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int((h + w) * ns * 0.01) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(targets) > 0: - idx = (targets[:, 0]).astype(int) - image_targets = targets[idx == i] + j = targets[:, 0] == i + ti = targets[j] # image targets if masks.max() > 1.0: # mean that masks are overlap image_masks = masks[[i]] # (1, 640, 640) # convert masks (1, 640, 640) -> (n, 640, 640) - nl = len(image_targets) + nl = len(ti) index = np.arange(nl).reshape(nl, 1, 1) + 1 image_masks = np.repeat(image_masks, nl, axis=0) image_masks = np.where(image_masks == index, 1.0, 0.0) else: - image_masks = masks[idx == i] + image_masks = masks[j] - boxes = xywh2xyxy(image_targets[:, 2:6]).T - classes = image_targets[:, 1].astype("int") - labels = image_targets.shape[1] == 6 # labels if no conf column - conf = (None if labels else image_targets[:, 6]) # check for confidence presence (label vs pred) + boxes = xywh2xyxy(ti[:, 2:6]).T + classes = ti[:, 1].astype('int') + labels = ti.shape[1] == 6 # labels if no conf column + conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 boxes[[0, 2]] *= w # scale to pixels boxes[[1, 3]] *= h - elif scale_factor < 1: # absolute coords need scale if image scales - boxes *= scale_factor - boxes[[0, 2]] += block_x - boxes[[1, 3]] += block_y - for j, box in enumerate(boxes.T): - cls = int(classes[j]) + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + cls = classes[j] color = colors(cls) cls = names[cls] if names else cls - if scale_factor < 1: - mask = image_masks[j].astype(np.uint8) - mask = cv2.resize(mask, (w, h)) - mask = mask.astype(np.bool) - else: - mask = image_masks[j].astype(np.bool) if labels or conf[j] > 0.25: # 0.25 conf thresh - label = "%s" % cls if labels else f"{cls} {conf[j]:.1f}" - plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) - mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] = \ - mosaic[block_y:block_y + h, block_x:block_x + w, :][mask] * 0.35 + (np.array(color) * 0.65) - - # Draw image filename labels - if paths: - label = Path(paths[i]).name[:40] # trim to 40 char - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - cv2.putText( - mosaic, - label, - (block_x + 5, block_y + t_size[1] + 5), - 0, - tl / 3, - [220, 220, 220], - thickness=tf, - lineType=cv2.LINE_AA, - ) - - # Image border - cv2.rectangle( - mosaic, - (block_x, block_y), - (block_x + w, block_y + h), - (255, 255, 255), - thickness=3, - ) - - if fname: - r = min(1280.0 / max(h, w) / ns, 1.0) # ratio to limit image size - mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) - # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save - with Image.fromarray(mosaic) as im: - im.save(fname) - return mosaic + label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' + annotator.box_label(box, label, color=color) + + # Plot masks + im = np.asarray(annotator.im) + for j, box in enumerate(boxes.T.tolist()): + if conf[j] > 0.25: # 0.25 conf thresh + color = colors(classes[j]) + if scale < 1: + mask = image_masks[j].astype(np.uint8) + mask = cv2.resize(mask, (w, h)) + mask = mask.astype(np.bool) + else: + mask = image_masks[j].astype(np.bool) + im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + annotator.fromarray(im) + annotator.im.save(fname) # save def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): @@ -210,7 +154,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11],) + 0.1 * data.values[:, 11], ) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From 1c94b4d48ec2e0978f42d0ab9aeaf4b40212715c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Aug 2022 21:01:12 +0000 Subject: [PATCH 114/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index b7b2c0fca0de..5ca1ba707a3d 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -154,7 +154,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11], ) + 0.1 * data.values[:, 11],) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From 75cbbbb577a178acb0748397b730fb655f3637b0 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 23:18:39 +0200 Subject: [PATCH 115/247] fix --- utils/segment/plots.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index b7b2c0fca0de..b98fbb770a51 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -127,9 +127,9 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' annotator.box_label(box, label, color=color) # Plot masks - im = np.asarray(annotator.im) + im = np.asarray(annotator.im).copy() for j, box in enumerate(boxes.T.tolist()): - if conf[j] > 0.25: # 0.25 conf thresh + if labels or conf[j] > 0.25: # 0.25 conf thresh color = colors(classes[j]) if scale < 1: mask = image_masks[j].astype(np.uint8) From 56be6c44de5256d69825b7b79bd92c5debe4b67d Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 23:40:40 +0200 Subject: [PATCH 116/247] Add train to CI --- .github/workflows/ci-testing.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 0edd05dbf86c..da09b9017dd3 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -128,11 +128,18 @@ jobs: run: | m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint + + d='../datasets' # unzip directory + f='coco128.zip' # or 'coco128-segments.zip', 68 MB + rm -rf $d + curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- m=yolov5s-seg # official weights python segment/predict.py --imgsz 64 --weights $m.pt --device cpu # detect python export.py --weights $m.pt --img 64 --include torchscript # export + python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg yolov5n_seg.yaml --epochs 1 --device cpu # train + # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- # python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train From f820f329aa5133ddc627eca310f747c4d12bef4d Mon Sep 17 00:00:00 2001 From: glennjocher Date: Tue, 23 Aug 2022 23:42:59 +0200 Subject: [PATCH 117/247] fix precommit --- .github/workflows/ci-testing.yml | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index da09b9017dd3..ba341f11843e 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -128,7 +128,7 @@ jobs: run: | m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint - + d='../datasets' # unzip directory f='coco128.zip' # or 'coco128-segments.zip', 68 MB rm -rf $d @@ -142,24 +142,24 @@ jobs: # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- -# python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train -# python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train -# for d in cpu; do # devices -# for w in $m $b; do # weights -# python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d # val -# python segment/predict.py --imgsz 64 --weights $w.pt --device $d # detect -# done -# done -# # python hubconf.py --model $m # hub -# # python models/tf.py --weights $m.pt # build TF model -# python models/yolo.py --cfg $m.yaml # build PyTorch model -# python export.py --weights $m.pt --img 64 --include torchscript # export -# python - < Date: Tue, 23 Aug 2022 23:44:04 +0200 Subject: [PATCH 118/247] fix precommit CI --- .github/workflows/ci-testing.yml | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index ba341f11843e..2f8b65ae70dd 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -130,36 +130,15 @@ jobs: b=runs/train-seg/exp/weights/best # best.pt checkpoint d='../datasets' # unzip directory - f='coco128.zip' # or 'coco128-segments.zip', 68 MB + f='coco128-segments.zip' rm -rf $d curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f - # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- m=yolov5s-seg # official weights python segment/predict.py --imgsz 64 --weights $m.pt --device cpu # detect python export.py --weights $m.pt --img 64 --include torchscript # export python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg yolov5n_seg.yaml --epochs 1 --device cpu # train - # Temporary tests untill yolov5n-seg.pt and COCO128-seg is ready --------------------------------------------- - - # python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train - # python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train - # for d in cpu; do # devices - # for w in $m $b; do # weights - # python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d # val - # python segment/predict.py --imgsz 64 --weights $w.pt --device $d # detect - # done - # done - # # python hubconf.py --model $m # hub - # # python models/tf.py --weights $m.pt # build TF model - # python models/yolo.py --cfg $m.yaml # build PyTorch model - # python export.py --weights $m.pt --img 64 --include torchscript # export - # python - < Date: Tue, 23 Aug 2022 23:53:40 +0200 Subject: [PATCH 119/247] fix precommit pycocotools --- .github/workflows/ci-testing.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 2f8b65ae70dd..eb05e72e95fd 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -134,6 +134,7 @@ jobs: rm -rf $d curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f + pip install pycocotools m=yolov5s-seg # official weights python segment/predict.py --imgsz 64 --weights $m.pt --device cpu # detect python export.py --weights $m.pt --img 64 --include torchscript # export From 492c89148f5c752113f19577a5c901c579396e21 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 00:09:35 +0200 Subject: [PATCH 120/247] fix val float issues --- segment/val.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/segment/val.py b/segment/val.py index 7438426dfb88..273c22b01bc9 100644 --- a/segment/val.py +++ b/segment/val.py @@ -299,10 +299,9 @@ def run( # deal with masks midx = [si] if overlap else targets[:, 0] == si - gt_masks = masks[midx] + gt_masks = masks[midx].float() proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, - 1).contiguous() + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() if plots and batch_i < 3: # filter top 15 to plot plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu()) @@ -447,9 +446,9 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)') - parser.add_argument('--batch-size', type=int, default=32, help='batch size') - parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model.pt path(s)') + parser.add_argument('--batch-size', type=int, default=8, help='batch size') + parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=320, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') parser.add_argument('--task', default='val', help='train, val, test, speed or study') From a86311444444d16f70d528645cd5915d75c0bd17 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Aug 2022 22:10:07 +0000 Subject: [PATCH 121/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/val.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index 273c22b01bc9..9b16c30a0bd4 100644 --- a/segment/val.py +++ b/segment/val.py @@ -301,7 +301,8 @@ def run( midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx].float() proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], + shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() if plots and batch_i < 3: # filter top 15 to plot plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu()) From ba46f44c3b498182b7b5db6132dd614bd849dd2e Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 00:21:46 +0200 Subject: [PATCH 122/247] fix masks float float issues --- segment/val.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/segment/val.py b/segment/val.py index 9b16c30a0bd4..e3d514df1023 100644 --- a/segment/val.py +++ b/segment/val.py @@ -253,7 +253,8 @@ def run( if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) - masks = masks.to(device).float() + masks = masks.to(device) + masks = masks.float() im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width @@ -299,7 +300,7 @@ def run( # deal with masks midx = [si] if overlap else targets[:, 0] == si - gt_masks = masks[midx].float() + gt_masks = masks[midx] proto_out = train_out[1][si] pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() From bdb79e4bf34c434e8603e97a4f1964d796c48cc1 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 00:47:22 +0200 Subject: [PATCH 123/247] suppress errors --- utils/segment/plots.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 9367ad80b404..724ef7b6e7f8 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -1,3 +1,4 @@ +import contextlib import math from pathlib import Path @@ -137,7 +138,8 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' mask = mask.astype(np.bool) else: mask = image_masks[j].astype(np.bool) - im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + with contextlib.suppress(Exception): + im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 annotator.fromarray(im) annotator.im.save(fname) # save @@ -149,12 +151,12 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): ax = ax.ravel() files = list(save_dir.glob("results*.csv")) assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." - for _, f in enumerate(files): + for f in files: try: data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11],) + 0.1 * data.values[:, 11], ) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): @@ -163,14 +165,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) if best: # best - ax[i].scatter( - index, - y[index], - color="r", - label=f"best:{index}", - marker="*", - linewidth=3, - ) + ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") else: # last From e66c15cecff07dc5c71f27e3dc3565b28d6765b9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Aug 2022 22:48:01 +0000 Subject: [PATCH 124/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 724ef7b6e7f8..986b334f6606 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -156,7 +156,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11], ) + 0.1 * data.values[:, 11],) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From d6979baa5197c75b739e089095458f3921a4aaa9 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 02:00:25 +0200 Subject: [PATCH 125/247] fix no-predictions plotting bug --- segment/val.py | 6 +++--- utils/segment/plots.py | 48 ++++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/segment/val.py b/segment/val.py index e3d514df1023..96c8354428dd 100644 --- a/segment/val.py +++ b/segment/val.py @@ -343,9 +343,9 @@ def run( mode="bilinear", align_corners=False, ).squeeze(0) - plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', - names) # labels - plot_masks = torch.cat(plot_masks, dim=0) + plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) + if any(plot_masks): + plot_masks = torch.cat(plot_masks, dim=0) plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 986b334f6606..280e6e6fe05a 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -96,15 +96,16 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' j = targets[:, 0] == i ti = targets[j] # image targets - if masks.max() > 1.0: # mean that masks are overlap - image_masks = masks[[i]] # (1, 640, 640) - # convert masks (1, 640, 640) -> (n, 640, 640) - nl = len(ti) - index = np.arange(nl).reshape(nl, 1, 1) + 1 - image_masks = np.repeat(image_masks, nl, axis=0) - image_masks = np.where(image_masks == index, 1.0, 0.0) - else: - image_masks = masks[j] + if any(masks): + if masks.max() > 1.0: # mean that masks are overlap + image_masks = masks[[i]] # (1, 640, 640) + # convert masks (1, 640, 640) -> (n, 640, 640) + nl = len(ti) + index = np.arange(nl).reshape(nl, 1, 1) + 1 + image_masks = np.repeat(image_masks, nl, axis=0) + image_masks = np.where(image_masks == index, 1.0, 0.0) + else: + image_masks = masks[j] boxes = xywh2xyxy(ti[:, 2:6]).T classes = ti[:, 1].astype('int') @@ -128,19 +129,20 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' annotator.box_label(box, label, color=color) # Plot masks - im = np.asarray(annotator.im).copy() - for j, box in enumerate(boxes.T.tolist()): - if labels or conf[j] > 0.25: # 0.25 conf thresh - color = colors(classes[j]) - if scale < 1: - mask = image_masks[j].astype(np.uint8) - mask = cv2.resize(mask, (w, h)) - mask = mask.astype(np.bool) - else: - mask = image_masks[j].astype(np.bool) - with contextlib.suppress(Exception): - im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 - annotator.fromarray(im) + if any(masks): + im = np.asarray(annotator.im).copy() + for j, box in enumerate(boxes.T.tolist()): + if labels or conf[j] > 0.25: # 0.25 conf thresh + color = colors(classes[j]) + if scale < 1: + mask = image_masks[j].astype(np.uint8) + mask = cv2.resize(mask, (w, h)) + mask = mask.astype(np.bool) + else: + mask = image_masks[j].astype(np.bool) + with contextlib.suppress(Exception): + im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + annotator.fromarray(im) annotator.im.save(fname) # save @@ -156,7 +158,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11],) + 0.1 * data.values[:, 11], ) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From 034b1a63c6a592a9c6df6e7ea0791a341ae4f3c2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 00:02:12 +0000 Subject: [PATCH 126/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 280e6e6fe05a..ef7940f2dad1 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -158,7 +158,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11], ) + 0.1 * data.values[:, 11],) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From 5035ebd3974751ed1537e994a50b6a9c20b52d68 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 02:38:54 +0200 Subject: [PATCH 127/247] Add CSV Logger --- utils/loggers/__init__.py | 14 +++++++++++--- utils/segment/plots.py | 22 ++++++++++------------ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index a59dbd31c073..42673d211f8d 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -245,6 +245,7 @@ def __init__(self, opt, console_logger, include=('tb', 'wandb')): self.save_dir = Path(opt.save_dir) self.include = include self.console_logger = console_logger + self.csv = self.save_dir / 'results.csv' # CSV logger if 'tb' in self.include: prefix = colorstr('TensorBoard: ') self.console_logger.info( @@ -258,14 +259,21 @@ def __init__(self, opt, console_logger, include=('tb', 'wandb')): else: self.wandb = None - def log_metrics(self, metrics_dict, epoch): + def log_metrics(self, metrics, epoch): # Log metrics dictionary to all loggers + if self.csv: + keys, vals = list(metrics.keys()), list(metrics.values()) + n = len(metrics) + 1 # number of cols + s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header + with open(self.csv, 'a') as f: + f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') + if self.tb: - for k, v in metrics_dict.items(): + for k, v in metrics.items(): self.tb.add_scalar(k, v, epoch) if self.wandb: - self.wandb.log(metrics_dict, step=epoch) + self.wandb.log(metrics, step=epoch) def log_images(self, files, name='Images', epoch=0): # Log images to all loggers diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 280e6e6fe05a..e9ee819c2fe7 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -96,17 +96,6 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' j = targets[:, 0] == i ti = targets[j] # image targets - if any(masks): - if masks.max() > 1.0: # mean that masks are overlap - image_masks = masks[[i]] # (1, 640, 640) - # convert masks (1, 640, 640) -> (n, 640, 640) - nl = len(ti) - index = np.arange(nl).reshape(nl, 1, 1) + 1 - image_masks = np.repeat(image_masks, nl, axis=0) - image_masks = np.where(image_masks == index, 1.0, 0.0) - else: - image_masks = masks[j] - boxes = xywh2xyxy(ti[:, 2:6]).T classes = ti[:, 1].astype('int') labels = ti.shape[1] == 6 # labels if no conf column @@ -129,7 +118,16 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' annotator.box_label(box, label, color=color) # Plot masks - if any(masks): + if len(masks): + if masks.max() > 1.0: # mean that masks are overlap + image_masks = masks[[i]] # (1, 640, 640) + nl = len(ti) + index = np.arange(nl).reshape(nl, 1, 1) + 1 + image_masks = np.repeat(image_masks, nl, axis=0) + image_masks = np.where(image_masks == index, 1.0, 0.0) + else: + image_masks = masks[j] + im = np.asarray(annotator.im).copy() for j, box in enumerate(boxes.T.tolist()): if labels or conf[j] > 0.25: # 0.25 conf thresh From be5a244be9a3ce470ccdc3468e26b96876684e0b Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 02:40:24 +0200 Subject: [PATCH 128/247] fix val len(plot_masks) --- segment/val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index 96c8354428dd..12f7c9fc3476 100644 --- a/segment/val.py +++ b/segment/val.py @@ -344,7 +344,7 @@ def run( align_corners=False, ).squeeze(0) plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) - if any(plot_masks): + if len(plot_masks): plot_masks = torch.cat(plot_masks, dim=0) plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred From 4fad59cfb45253a651dd536bd468fd09ff09ffa9 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Wed, 24 Aug 2022 10:52:53 +0800 Subject: [PATCH 129/247] speed up evaluation --- segment/val.py | 19 ++++++++----------- utils/segment/plots.py | 3 ++- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/segment/val.py b/segment/val.py index 12f7c9fc3476..d014131b7ddd 100644 --- a/segment/val.py +++ b/segment/val.py @@ -122,6 +122,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): mode="bilinear", align_corners=False, ).squeeze(0) + gt_masks = gt_masks.gt_(0.5) iou = mask_iou( gt_masks.view(gt_masks.shape[0], -1), @@ -171,7 +172,7 @@ def run( mask_downsample_ratio=1, compute_loss=None, ): - process = process_mask_upsample if plots else process_mask + process = process_mask_upsample if save_json else process_mask # Initialize/load model and set device training = model is not None if training: # called by train.py @@ -304,9 +305,6 @@ def run( proto_out = train_out[1][si] pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() - if plots and batch_i < 3: - # filter top 15 to plot - plot_masks.append(torch.as_tensor(pred_masks[:15], dtype=torch.uint8).cpu()) # Predictions if single_cls: @@ -326,6 +324,12 @@ def run( stats.append( (correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) + # convert pred_masks to uint8 + pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) + if plots and batch_i < 3: + # filter top 15 to plot + plot_masks.append(pred_masks[:15].cpu()) + # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) @@ -336,13 +340,6 @@ def run( # Plot images if plots and batch_i < 3: - if masks.shape[1:] != im.shape[2:]: - masks = F.interpolate( - masks.unsqueeze(0).float(), - im.shape[2:], - mode="bilinear", - align_corners=False, - ).squeeze(0) plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) if len(plot_masks): plot_masks = torch.cat(plot_masks, dim=0) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 11b7081f4995..6303103ed084 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -132,7 +132,8 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' for j, box in enumerate(boxes.T.tolist()): if labels or conf[j] > 0.25: # 0.25 conf thresh color = colors(classes[j]) - if scale < 1: + mh, mw = image_masks[j].shape[:2] + if mh != h or mw != w: mask = image_masks[j].astype(np.uint8) mask = cv2.resize(mask, (w, h)) mask = mask.astype(np.bool) From ce6d849cf536e11172e95a45102c40c48612bf16 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Wed, 24 Aug 2022 11:26:31 +0800 Subject: [PATCH 130/247] fix process_mask --- utils/segment/general.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index fe4898b2cdd4..80286e3fd94b 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -44,7 +44,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): """ c, mh, mw = proto_out.shape # CHW - masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw) + masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes) # HWC return masks.gt_(0.5) @@ -63,7 +63,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): c, mh, mw = proto_out.shape # CHW ih, iw = shape - masks = (out_masks.tanh() @ proto_out.view(c, -1)).sigmoid().view(-1, mh, mw) # CHW + masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW downsampled_bboxes = bboxes.clone() downsampled_bboxes[:, 0] *= mw / iw From 61212a6a22aab6965e28dae25a5ec841965031eb Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Wed, 24 Aug 2022 11:26:40 +0800 Subject: [PATCH 131/247] fix plots --- utils/segment/plots.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 6303103ed084..4517ff455cba 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -93,8 +93,8 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' if paths: annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(targets) > 0: - j = targets[:, 0] == i - ti = targets[j] # image targets + idx = targets[:, 0] == i + ti = targets[idx] # image targets boxes = xywh2xyxy(ti[:, 2:6]).T classes = ti[:, 1].astype('int') @@ -126,13 +126,13 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' image_masks = np.repeat(image_masks, nl, axis=0) image_masks = np.where(image_masks == index, 1.0, 0.0) else: - image_masks = masks[j] + image_masks = masks[idx] im = np.asarray(annotator.im).copy() for j, box in enumerate(boxes.T.tolist()): if labels or conf[j] > 0.25: # 0.25 conf thresh color = colors(classes[j]) - mh, mw = image_masks[j].shape[:2] + mh, mw = image_masks[j].shape if mh != h or mw != w: mask = image_masks[j].astype(np.uint8) mask = cv2.resize(mask, (w, h)) From c00c632da2afcc310c6f7861238d43a1c0524923 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 12:54:59 +0200 Subject: [PATCH 132/247] update segment/utils build_targets --- utils/segment/loss.py | 65 ++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index bff4b25ca867..c8bdc6a36dac 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F +from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel -from .general import crop, masks_iou class MaskIOULoss(nn.Module): @@ -39,6 +39,7 @@ def __init__(self, model, autobalance=False, overlap=False): self.overlap = overlap device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters + self.device = device # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device)) @@ -190,58 +191,49 @@ def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], [] - gain = torch.ones(8, device=targets.device) # normalized to gridspace gain - ai = (torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, - nt)) # same as .repeat_interleave(nt) + gain = torch.ones(8, device=self.device) # normalized to gridspace gain + ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) if self.overlap: batch = p[0].shape[0] ti = [] for i in range(batch): - # find number of targets of each image - num = (targets[:, 0] == i).sum() - # (na, num) - ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1) - # (na, nt) - ti = torch.cat(ti, 1) + num = (targets[:, 0] == i).sum() # find number of targets of each image + ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) + ti = torch.cat(ti, 1) # (na, nt) else: - ti = (torch.arange(nt, device=targets.device).float().view(1, - nt).repeat(na, - 1)) # same as .repeat_interleave(nt) - - targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None], ti[:, :, None]), 2) # append anchor indices + ti = torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1) + targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices g = 0.5 # bias - off = ( - torch.tensor( - [ - [0, 0], - [1, 0], - [0, 1], - [-1, 0], - [0, -1], # j,k,l,m - # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], - device=targets.device, - ).float() * g) # offsets + off = torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=self.device).float() * g # offsets for i in range(self.nl): anchors, shape = self.anchors[i], p[i].shape gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors - t = targets * gain + t = targets * gain # shape(3,n,7) if nt: # Matches - r = t[:, :, 4:6] / anchors[:, None] # wh ratio - j = torch.max(r, 1.0 / r).max(2)[0] < self.hyp["anchor_t"] # compare + r = t[..., 4:6] / anchors[:, None] # wh ratio + j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[[2, 3]] - gxy # inverse - j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T - l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T + j, k = ((gxy % 1 < g) & (gxy > 1)).T + l, m = ((gxi % 1 < g) & (gxi > 1)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) t = t.repeat((5, 1, 1))[j] offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] @@ -250,15 +242,12 @@ def build_targets(self, p, targets): offsets = 0 # Define - b, c = t[:, :2].long().T # image, class - gxy = t[:, 2:4] # grid xy - gwh = t[:, 4:6] # grid wh + bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors + (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class gij = (gxy - offsets).long() - gi, gj = gij.T # grid xy indices + gi, gj = gij.T # grid indices # Append - a = t[:, 6].long() # anchor indices - tidx = t[:, 7].long() indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors From bd277b76d1104e7bea962c38c17016c34a4b0706 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 10:57:43 +0000 Subject: [PATCH 133/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index c8bdc6a36dac..94f64dfcc3a7 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F -from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel +from .general import crop, masks_iou class MaskIOULoss(nn.Module): From c37820f4959273551107bf8d32997820e47a9c19 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 14:02:47 +0200 Subject: [PATCH 134/247] optimize utils/segment/general crop() --- utils/segment/general.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index 80286e3fd94b..e9a5c904fdc1 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -12,24 +12,20 @@ def crop(masks, boxes): - masks should be a size [h, w, n] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form """ - h, w, n = masks.size() - x1, x2 = boxes[:, 0], boxes[:, 2] - y1, y2 = boxes[:, 1], boxes[:, 3] + h, w, n = masks.shape + x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1) # x1 shape(1,1,n) - rows = (torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n)) - cols = (torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n)) + rows = torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n) # shape(h,w,n) + cols = torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n) # shape(h,w,n) # (1, w, 1), (1, 1, n) -> (1, w, n) - masks_left = rows >= x1.view(1, 1, -1) - masks_right = rows < x2.view(1, 1, -1) + masks_left = rows >= x1 # shape(h,w,n) + masks_right = rows < x2 # shape(h,w,n) # (h, 1, 1), (1, 1, n) -> (h, 1, n) - masks_up = cols >= y1.view(1, 1, -1) - masks_down = cols < y2.view(1, 1, -1) + masks_up = cols >= y1 # shape(h,w,n) + masks_down = cols < y2 # shape(h,w,n) - # (h, w, n) - crop_mask = masks_left * masks_right * masks_up * masks_down - - return masks * crop_mask.float() + return masks * (masks_left * masks_right * masks_up * masks_down).float() def process_mask_upsample(proto_out, out_masks, bboxes, shape): From bf9e19af3c56a2a7af73d69a7c58ca37e1490ced Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 15:30:17 +0200 Subject: [PATCH 135/247] optimize utils/segment/general crop() 2 --- utils/segment/general.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index e9a5c904fdc1..cc8cc2997541 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -12,20 +12,12 @@ def crop(masks, boxes): - masks should be a size [h, w, n] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form """ + h, w, n = masks.shape x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1) # x1 shape(1,1,n) - - rows = torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n) # shape(h,w,n) - cols = torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n) # shape(h,w,n) - - # (1, w, 1), (1, 1, n) -> (1, w, n) - masks_left = rows >= x1 # shape(h,w,n) - masks_right = rows < x2 # shape(h,w,n) - # (h, 1, 1), (1, 1, n) -> (h, 1, n) - masks_up = cols >= y1 # shape(h,w,n) - masks_down = cols < y2 # shape(h,w,n) - - return masks * (masks_left * masks_right * masks_up * masks_down).float() + r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None] # rows shape(1,w,1) + c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None] # cols shape(h,1,1) + return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)).float() def process_mask_upsample(proto_out, out_masks, bboxes, shape): From d276b6791189589bdb0552165511ba5aba20220e Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 16:00:15 +0200 Subject: [PATCH 136/247] minor updates --- segment/val.py | 2 +- utils/segment/loss.py | 15 ++++++--------- utils/segment/plots.py | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/segment/val.py b/segment/val.py index d014131b7ddd..7f8361a53c24 100644 --- a/segment/val.py +++ b/segment/val.py @@ -113,7 +113,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): nl = len(labels) index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 gt_masks = gt_masks.repeat(nl, 1, 1) - gt_masks = torch.where(gt_masks == index, 1.0, 0.0) + gt_masks = torch.where(gt_masks == index) if gt_masks.shape[1:] != pred_masks.shape[1:]: gt_masks = F.interpolate( diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 94f64dfcc3a7..5aa617ae2065 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F +from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel -from .general import crop, masks_iou class MaskIOULoss(nn.Module): @@ -122,9 +122,9 @@ def __call__(self, preds, targets, masks): # predictions, targets, model # Mask Regression # TODO: # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w] - downsampled_masks = F.interpolate(masks[None, :], (mask_h, mask_w), + downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", - align_corners=False).squeeze(0) + align_corners=False)[0] mxywh = xywh[i] mws, mhs = mxywh[:, 2:].T @@ -138,10 +138,8 @@ def __call__(self, preds, targets, masks): # predictions, targets, model index = b == bi if self.overlap: mask_index = tidxs[i][index] - # h, w, n - mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum()) - # h, w, n - mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0) + mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum()) # shape(h,w,n) + mask_gti = torch.where(mask_gti == mask_index) # shape(h,w,n) else: mask_gti = downsampled_masks[tidxs[i]][index] mask_gti = mask_gti.permute(1, 2, 0).contiguous() @@ -151,8 +149,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model psi = ps[index][:, 5:self.nm] proto = proto_out[bi] - one_lseg = self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) - batch_lseg += one_lseg + batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) # # update tobj # iou = iou.detach().clamp(0).type(tobj.dtype) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 4517ff455cba..901d55888f44 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -124,7 +124,7 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' nl = len(ti) index = np.arange(nl).reshape(nl, 1, 1) + 1 image_masks = np.repeat(image_masks, nl, axis=0) - image_masks = np.where(image_masks == index, 1.0, 0.0) + image_masks = np.where(image_masks == index) else: image_masks = masks[idx] From 854f728c1cc96ebae2237ce345ec256f84f8309e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 14:00:40 +0000 Subject: [PATCH 137/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/loss.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 5aa617ae2065..bd9501205038 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F -from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel +from .general import crop, masks_iou class MaskIOULoss(nn.Module): @@ -122,8 +122,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model # Mask Regression # TODO: # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w] - downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w), - mode="bilinear", + downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0] mxywh = xywh[i] From f1a533d207894c71804bd090ee8bb41c1bf28326 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 16:47:29 +0200 Subject: [PATCH 138/247] torch.where revert --- segment/val.py | 2 +- utils/segment/loss.py | 2 +- utils/segment/plots.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/segment/val.py b/segment/val.py index 7f8361a53c24..d014131b7ddd 100644 --- a/segment/val.py +++ b/segment/val.py @@ -113,7 +113,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): nl = len(labels) index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 gt_masks = gt_masks.repeat(nl, 1, 1) - gt_masks = torch.where(gt_masks == index) + gt_masks = torch.where(gt_masks == index, 1.0, 0.0) if gt_masks.shape[1:] != pred_masks.shape[1:]: gt_masks = F.interpolate( diff --git a/utils/segment/loss.py b/utils/segment/loss.py index bd9501205038..13c62ab274eb 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -138,7 +138,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model if self.overlap: mask_index = tidxs[i][index] mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum()) # shape(h,w,n) - mask_gti = torch.where(mask_gti == mask_index) # shape(h,w,n) + mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0) # shape(h,w,n) else: mask_gti = downsampled_masks[tidxs[i]][index] mask_gti = mask_gti.permute(1, 2, 0).contiguous() diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 901d55888f44..4517ff455cba 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -124,7 +124,7 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' nl = len(ti) index = np.arange(nl).reshape(nl, 1, 1) + 1 image_masks = np.repeat(image_masks, nl, axis=0) - image_masks = np.where(image_masks == index) + image_masks = np.where(image_masks == index, 1.0, 0.0) else: image_masks = masks[idx] From e0e256d7886edddb9b2723204dd7a160db660d35 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 17:03:19 +0200 Subject: [PATCH 139/247] downsample only if different shape --- utils/segment/loss.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 13c62ab274eb..f757fde30c30 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F +from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel -from .general import crop, masks_iou class MaskIOULoss(nn.Module): @@ -120,10 +120,9 @@ def __call__(self, preds, targets, masks): # predictions, targets, model lcls += self.BCEcls(ps[:, self.nm:], t) # BCE # Mask Regression - # TODO: - # [bs * num_objs, img_h, img_w] -> [bs * num_objs, mask_h, mask_w] - downsampled_masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", - align_corners=False)[0] + if tuple(masks.shape[-2:]) != (mask_h, mask_w): + # downsample shape(bs * num_objs,img_h,img_w) -> (bs * num_objs,mask_h,mask_w) + masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0] mxywh = xywh[i] mws, mhs = mxywh[:, 2:].T @@ -137,10 +136,10 @@ def __call__(self, preds, targets, masks): # predictions, targets, model index = b == bi if self.overlap: mask_index = tidxs[i][index] - mask_gti = downsampled_masks[bi][:, :, None].repeat(1, 1, index.sum()) # shape(h,w,n) + mask_gti = masks[bi][:, :, None].repeat(1, 1, index.sum()) # shape(h,w,n) mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0) # shape(h,w,n) else: - mask_gti = downsampled_masks[tidxs[i]][index] + mask_gti = masks[tidxs[i]][index] mask_gti = mask_gti.permute(1, 2, 0).contiguous() mw, mh = mws[index], mhs[index] From da11068ea9dade8958574ca3b85b16037c07ce54 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 15:03:52 +0000 Subject: [PATCH 140/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index f757fde30c30..4469cb7658a7 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F -from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import is_parallel +from .general import crop, masks_iou class MaskIOULoss(nn.Module): From 95a999434ec200fa7a3bc41e83a38d517a873a2f Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 17:30:18 +0200 Subject: [PATCH 141/247] loss cleanup --- utils/segment/loss.py | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index f757fde30c30..473e2f65dad5 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -45,8 +45,6 @@ def __init__(self, model, autobalance=False, overlap=False): BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device)) BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["obj_pw"]], device=device)) - self.mask_loss = MaskIOULoss() - # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets @@ -58,13 +56,8 @@ def __init__(self, model, autobalance=False, overlap=False): det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index - self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = ( - BCEcls, - BCEobj, - 1.0, - h, - autobalance, - ) + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance, + self.mask_loss = MaskIOULoss() for k in "na", "nc", "nl", "anchors", "nm": if hasattr(det, k): setattr(self, k, getattr(det, k)) @@ -76,13 +69,12 @@ def __call__(self, preds, targets, masks): # predictions, targets, model mask_h, mask_w = proto_out.shape[2:] proto_out = proto_out.permute(0, 2, 3, 1) - device = targets.device - lcls, lbox, lobj, lseg = ( - torch.zeros(1, device=device), - torch.zeros(1, device=device), - torch.zeros(1, device=device), - torch.zeros(1, device=device), - ) + device = self.device + lcls = torch.zeros(1, device=device) + lbox = torch.zeros(1, device=device) + lobj = torch.zeros(1, device=device) + lseg = torch.zeros(1, device=device) + tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions @@ -101,17 +93,13 @@ def __call__(self, preds, targets, masks): # predictions, targets, model lbox += (1.0 - iou).mean() # iou loss # Objectness - score_iou = iou.detach().clamp(0).type(tobj.dtype) + iou = iou.detach().clamp(0).type(tobj.dtype) if self.sort_obj_iou: - sort_id = torch.argsort(score_iou) - b, a, gj, gi, score_iou = ( - b[sort_id], - a[sort_id], - gj[sort_id], - gi[sort_id], - score_iou[sort_id], - ) - tobj[b, a, gj, gi] = 1.0 * ((1.0 - self.gr) + self.gr * score_iou) # iou ratio + j = iou.argsort() + b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] + if self.gr < 1: + iou = (1.0 - self.gr) + self.gr * iou + tobj[b, a, gj, gi] = iou # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) From 868385f915e240f0c4306b2b527cd1bcabab6ada Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 19:00:03 +0200 Subject: [PATCH 142/247] loss cleanup 2 --- utils/segment/loss.py | 60 +++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 5c37f5d19ca2..7d2880712d29 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F +from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou -from ..torch_utils import is_parallel -from .general import crop, masks_iou +from ..torch_utils import de_parallel class MaskIOULoss(nn.Module): @@ -42,52 +42,52 @@ def __init__(self, model, autobalance=False, overlap=False): self.device = device # Define criteria - BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device)) - BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["obj_pw"]], device=device)) + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 - self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets # Focal loss - g = h["fl_gamma"] # focal loss gamma + g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) - det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module - self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 - self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index - self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance, + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance self.mask_loss = MaskIOULoss() - for k in "na", "nc", "nl", "anchors", "nm": - if hasattr(det, k): - setattr(self, k, getattr(det, k)) + self.na = m.na # number of anchors + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.nm = m.nm # number of masks + self.anchors = m.anchors + self.device = device def __call__(self, preds, targets, masks): # predictions, targets, model - p = preds[0] - # [batch-size, mask_dim, mask_hegiht, mask_width] - proto_out = preds[1] + p, proto_out = preds # proto_out shape(bs, masks, mask_h, mask_w) mask_h, mask_w = proto_out.shape[2:] proto_out = proto_out.permute(0, 2, 3, 1) - device = self.device - lcls = torch.zeros(1, device=device) - lbox = torch.zeros(1, device=device) - lobj = torch.zeros(1, device=device) - lseg = torch.zeros(1, device=device) - + lcls = torch.zeros(1, device=self.device) + lbox = torch.zeros(1, device=self.device) + lobj = torch.zeros(1, device=self.device) + lseg = torch.zeros(1, device=self.device) tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets) # targets + # Losses for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx - tobj = torch.zeros_like(pi[..., 0], device=device) # target obj + tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj n = b.shape[0] # number of targets if n: - ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, 32, self.nc), 1) # subset of predictions # Regression - pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 - pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + pxy = pxy.sigmoid() * 2 - 0.5 + pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss @@ -103,9 +103,9 @@ def __call__(self, preds, targets, masks): # predictions, targets, model # Classification if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, self.nm:], self.cn, device=device) # targets + t = torch.full_like(pcls, self.cn, device=self.device) # targets t[range(n), tcls[i]] = self.cp - lcls += self.BCEcls(ps[:, self.nm:], t) # BCE + lcls += self.BCEcls(pcls, t) # BCE # Mask Regression if tuple(masks.shape[-2:]) != (mask_h, mask_w): @@ -119,7 +119,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device)) mxyxys = xywh2xyxy(mxywhs) - batch_lseg = torch.zeros(1, device=device) + batch_lseg = torch.zeros(1, device=self.device) for bi in b.unique(): index = b == bi if self.overlap: @@ -132,7 +132,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model mw, mh = mws[index], mhs[index] mxyxy = mxyxys[index] - psi = ps[index][:, 5:self.nm] + psi = pmask[index] proto = proto_out[bi] batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) From 226c96ae866e9b3898f6d603bb0a070253e9283f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 17:00:41 +0000 Subject: [PATCH 143/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 7d2880712d29..f01ef1683399 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -2,11 +2,11 @@ import torch.nn as nn import torch.nn.functional as F -from .general import crop, masks_iou from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import de_parallel +from .general import crop, masks_iou class MaskIOULoss(nn.Module): From 5b52c941126d2b833e25b2f14dfc1db2feab19dc Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 19:12:07 +0200 Subject: [PATCH 144/247] loss cleanup 3 --- utils/segment/loss.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index f01ef1683399..719424478621 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -66,9 +66,9 @@ def __init__(self, model, autobalance=False, overlap=False): self.device = device def __call__(self, preds, targets, masks): # predictions, targets, model - p, proto_out = preds # proto_out shape(bs, masks, mask_h, mask_w) - mask_h, mask_w = proto_out.shape[2:] - proto_out = proto_out.permute(0, 2, 3, 1) + p, proto = preds + bs, nm, mask_h, mask_w = proto.shape # proto shape(bs, mask_h, mask_w, num_masks) + proto = proto.permute(0, 2, 3, 1) lcls = torch.zeros(1, device=self.device) lbox = torch.zeros(1, device=self.device) @@ -83,7 +83,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model n = b.shape[0] # number of targets if n: - pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, 32, self.nc), 1) # subset of predictions + pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, nm, self.nc), 1) # subset of predictions # Regression pxy = pxy.sigmoid() * 2 - 0.5 @@ -121,21 +121,16 @@ def __call__(self, preds, targets, masks): # predictions, targets, model batch_lseg = torch.zeros(1, device=self.device) for bi in b.unique(): - index = b == bi + j = b == bi # matching index if self.overlap: - mask_index = tidxs[i][index] - mask_gti = masks[bi][:, :, None].repeat(1, 1, index.sum()) # shape(h,w,n) + mask_index = tidxs[i][j] + mask_gti = masks[bi][:, :, None].repeat(1, 1, j.sum()) # shape(h,w,n) mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0) # shape(h,w,n) else: - mask_gti = masks[tidxs[i]][index] + mask_gti = masks[tidxs[i]][j] mask_gti = mask_gti.permute(1, 2, 0).contiguous() - mw, mh = mws[index], mhs[index] - mxyxy = mxyxys[index] - psi = pmask[index] - proto = proto_out[bi] - - batch_lseg += self.single_mask_loss(mask_gti, psi, proto, mxyxy, mw, mh) + batch_lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j]) # # update tobj # iou = iou.detach().clamp(0).type(tobj.dtype) From cf40b17375c22b05ca6b644853a4847654cd2126 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 19:24:19 +0200 Subject: [PATCH 145/247] update project names --- segment/train.py | 4 ++-- segment/val.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/segment/train.py b/segment/train.py index 55b9c53a7ef8..223e331db6ab 100644 --- a/segment/train.py +++ b/segment/train.py @@ -499,7 +499,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary def parse_opt(known=False): parser = argparse.ArgumentParser() - parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') + parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s-seg.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') @@ -522,7 +522,7 @@ def parse_opt(known=False): parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') - parser.add_argument('--project', default=ROOT / 'runs/train_segment', help='save to project/name') + parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save results to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') diff --git a/segment/val.py b/segment/val.py index d014131b7ddd..ec53f589e8cc 100644 --- a/segment/val.py +++ b/segment/val.py @@ -445,7 +445,7 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model.pt path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=8, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=320, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') @@ -460,7 +460,7 @@ def parse_opt(): parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') - parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name') + parser.add_argument('--project', default=ROOT / 'runs/val-seg', help='save results to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') From dae6549e200096feb49cd47938b299cf00cbcde6 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 21:21:39 +0200 Subject: [PATCH 146/247] Rename -seg yamls from _underscore to -dash --- models/segment/{yolov5l_seg.yaml => yolov5l-seg.yaml} | 0 models/segment/{yolov5m_seg.yaml => yolov5m-seg.yaml} | 0 models/segment/{yolov5n_seg.yaml => yolov5n-seg.yaml} | 0 models/segment/{yolov5s_seg.yaml => yolov5s-seg.yaml} | 0 models/segment/{yolov5x_seg.yaml => yolov5x-seg.yaml} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename models/segment/{yolov5l_seg.yaml => yolov5l-seg.yaml} (100%) rename models/segment/{yolov5m_seg.yaml => yolov5m-seg.yaml} (100%) rename models/segment/{yolov5n_seg.yaml => yolov5n-seg.yaml} (100%) rename models/segment/{yolov5s_seg.yaml => yolov5s-seg.yaml} (100%) rename models/segment/{yolov5x_seg.yaml => yolov5x-seg.yaml} (100%) diff --git a/models/segment/yolov5l_seg.yaml b/models/segment/yolov5l-seg.yaml similarity index 100% rename from models/segment/yolov5l_seg.yaml rename to models/segment/yolov5l-seg.yaml diff --git a/models/segment/yolov5m_seg.yaml b/models/segment/yolov5m-seg.yaml similarity index 100% rename from models/segment/yolov5m_seg.yaml rename to models/segment/yolov5m-seg.yaml diff --git a/models/segment/yolov5n_seg.yaml b/models/segment/yolov5n-seg.yaml similarity index 100% rename from models/segment/yolov5n_seg.yaml rename to models/segment/yolov5n-seg.yaml diff --git a/models/segment/yolov5s_seg.yaml b/models/segment/yolov5s-seg.yaml similarity index 100% rename from models/segment/yolov5s_seg.yaml rename to models/segment/yolov5s-seg.yaml diff --git a/models/segment/yolov5x_seg.yaml b/models/segment/yolov5x-seg.yaml similarity index 100% rename from models/segment/yolov5x_seg.yaml rename to models/segment/yolov5x-seg.yaml From 532958539b8756c434d9e13aa665dcd813895b65 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 23:08:43 +0200 Subject: [PATCH 147/247] prepare for yolov5n-seg.pt --- .github/workflows/ci-testing.yml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index eb05e72e95fd..718dd377eafb 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -126,20 +126,23 @@ jobs: - name: Test segmentation shell: bash # for Windows compatibility run: | - m=${{ matrix.model }}-seg # official weights - b=runs/train-seg/exp/weights/best # best.pt checkpoint - + pip install pycocotools + d='../datasets' # unzip directory f='coco128-segments.zip' rm -rf $d curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f - pip install pycocotools - m=yolov5s-seg # official weights - python segment/predict.py --imgsz 64 --weights $m.pt --device cpu # detect + m=${{ matrix.model }}-seg # official weights + b=runs/train-seg/exp/weights/best # best.pt checkpoint + python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train + for d in cpu; do # devices + for w in $m $b; do # weights + python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d # val + python segment/predict.py --imgsz 64 --weights $w.pt --device $d # predict + done + done python export.py --weights $m.pt --img 64 --include torchscript # export - python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg yolov5n_seg.yaml --epochs 1 --device cpu # train - - name: Test classification shell: bash # for Windows compatibility run: | From 5c9306984c58f929612406f89a66e39e58618f03 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 23:09:26 +0200 Subject: [PATCH 148/247] precommit space fix --- .github/workflows/ci-testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 718dd377eafb..6494a8cf23f8 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -127,7 +127,7 @@ jobs: shell: bash # for Windows compatibility run: | pip install pycocotools - + d='../datasets' # unzip directory f='coco128-segments.zip' rm -rf $d From 156c01284d751edba14f01c33b19a25b2292f0d9 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 23:19:17 +0200 Subject: [PATCH 149/247] add coco128-seg.yaml --- .github/workflows/ci-testing.yml | 8 +-- data/coco128-seg.yaml | 101 +++++++++++++++++++++++++++++++ segment/train.py | 2 +- segment/val.py | 8 +-- 4 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 data/coco128-seg.yaml diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 6494a8cf23f8..5be0884991c9 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -126,13 +126,7 @@ jobs: - name: Test segmentation shell: bash # for Windows compatibility run: | - pip install pycocotools - - d='../datasets' # unzip directory - f='coco128-segments.zip' - rm -rf $d - curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f -# && unzip -q $f -d $d && rm $f - + pip install -q pycocotools m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train diff --git a/data/coco128-seg.yaml b/data/coco128-seg.yaml new file mode 100644 index 000000000000..db05534c526e --- /dev/null +++ b/data/coco128-seg.yaml @@ -0,0 +1,101 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics +# Example usage: python train.py --data coco128.yaml +# parent +# ├── yolov5 +# └── datasets +# └── coco128 ← downloads here (7 MB) + + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/coco128-seg # dataset root dir +train: images/train2017 # train images (relative to 'path') 128 images +val: images/train2017 # val images (relative to 'path') 128 images +test: # test images (optional) + +# Classes +names: + 0: person + 1: bicycle + 2: car + 3: motorcycle + 4: airplane + 5: bus + 6: train + 7: truck + 8: boat + 9: traffic light + 10: fire hydrant + 11: stop sign + 12: parking meter + 13: bench + 14: bird + 15: cat + 16: dog + 17: horse + 18: sheep + 19: cow + 20: elephant + 21: bear + 22: zebra + 23: giraffe + 24: backpack + 25: umbrella + 26: handbag + 27: tie + 28: suitcase + 29: frisbee + 30: skis + 31: snowboard + 32: sports ball + 33: kite + 34: baseball bat + 35: baseball glove + 36: skateboard + 37: surfboard + 38: tennis racket + 39: bottle + 40: wine glass + 41: cup + 42: fork + 43: knife + 44: spoon + 45: bowl + 46: banana + 47: apple + 48: sandwich + 49: orange + 50: broccoli + 51: carrot + 52: hot dog + 53: pizza + 54: donut + 55: cake + 56: chair + 57: couch + 58: potted plant + 59: bed + 60: dining table + 61: toilet + 62: tv + 63: laptop + 64: mouse + 65: remote + 66: keyboard + 67: cell phone + 68: microwave + 69: oven + 70: toaster + 71: sink + 72: refrigerator + 73: book + 74: clock + 75: vase + 76: scissors + 77: teddy bear + 78: hair drier + 79: toothbrush + + +# Download script/URL (optional) +download: https://ultralytics.com/assets/coco128-segments.zip diff --git a/segment/train.py b/segment/train.py index 223e331db6ab..ca23b4256317 100644 --- a/segment/train.py +++ b/segment/train.py @@ -501,7 +501,7 @@ def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s-seg.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=300) parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') diff --git a/segment/val.py b/segment/val.py index ec53f589e8cc..884d32ffdf59 100644 --- a/segment/val.py +++ b/segment/val.py @@ -159,7 +159,7 @@ def run( save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file - project=ROOT / 'runs/val', # save to project/name + project=ROOT / 'runs/val-seg', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference @@ -444,10 +444,10 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)') - parser.add_argument('--batch-size', type=int, default=8, help='batch size') - parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=320, help='inference size (pixels)') + parser.add_argument('--batch-size', type=int, default=32, help='batch size') + parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') parser.add_argument('--task', default='val', help='train, val, test, speed or study') From 92a3ff0497d83dcc42178a19f7e2f84029dc6ff2 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 23:29:20 +0200 Subject: [PATCH 150/247] update coco128-seg comments --- data/coco128.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/coco128.yaml b/data/coco128.yaml index 12556736a571..d551e994e379 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -1,10 +1,10 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license -# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics +# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # Example usage: python train.py --data coco128.yaml # parent # ├── yolov5 # └── datasets -# └── coco128 ← downloads here (7 MB) +# └── coco128-seg ← downloads here (7 MB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] From e2b07026cc4d89bff802b106d3ad56544119762a Mon Sep 17 00:00:00 2001 From: glennjocher Date: Wed, 24 Aug 2022 23:38:52 +0200 Subject: [PATCH 151/247] cleanup val.py --- segment/val.py | 29 ++++++++++++++++------------- val.py | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/segment/val.py b/segment/val.py index 884d32ffdf59..ac604cb59575 100644 --- a/segment/val.py +++ b/segment/val.py @@ -1,21 +1,22 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ -Validate a trained YOLOv5 model accuracy on a custom dataset +Validate a trained YOLOv5 segment model on a segment dataset Usage: - $ python path/to/val.py --weights yolov5s.pt --data coco128.yaml --img 640 + $ bash data/scripts/get_coco.sh --val --segments # download COCO-segments val split (1G, 5000 images) + $ python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640- # validate COCO-segments Usage - formats: - $ python path/to/val.py --weights yolov5s.pt # PyTorch - yolov5s.torchscript # TorchScript - yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn - yolov5s.xml # OpenVINO - yolov5s.engine # TensorRT - yolov5s.mlmodel # CoreML (macOS-only) - yolov5s_saved_model # TensorFlow SavedModel - yolov5s.pb # TensorFlow GraphDef - yolov5s.tflite # TensorFlow Lite - yolov5s_edgetpu.tflite # TensorFlow Edge TPU + $ python segment/val.py --weights yolov5s-seg.pt # PyTorch + yolov5s-seg.torchscript # TorchScript + yolov5s-seg.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s-seg.xml # OpenVINO + yolov5s-seg.engine # TensorRT + yolov5s-seg.mlmodel # CoreML (macOS-only) + yolov5s-seg_saved_model # TensorFlow SavedModel + yolov5s-seg.pb # TensorFlow GraphDef + yolov5s-seg.tflite # TensorFlow Lite + yolov5s-seg_edgetpu.tflite # TensorFlow Edge TPU """ import argparse @@ -478,7 +479,9 @@ def main(opt): if opt.task in ('train', 'val', 'test'): # run normally if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 - LOGGER.info(emojis(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️')) + LOGGER.info(f'WARNING: confidence threshold {opt.conf_thres} > 0.001 produces invalid results ⚠️') + if opt.save_hybrid: + LOGGER.info('WARNING: --save-hybrid will return high mAP from hybrid labels, not from predictions alone ⚠️') run(**vars(opt)) else: diff --git a/val.py b/val.py index 58b9c9e1bec0..9cacd47b3ab3 100644 --- a/val.py +++ b/val.py @@ -331,7 +331,7 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') From c4e84f1d552f1e86940a4ad030c07836bbc92afe Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 00:37:01 +0200 Subject: [PATCH 152/247] Major val.py cleanup --- segment/val.py | 200 +++++++++++++++++++------------------------------ val.py | 2 +- 2 files changed, 80 insertions(+), 122 deletions(-) diff --git a/segment/val.py b/segment/val.py index ac604cb59575..ffe0c7cc5148 100644 --- a/segment/val.py +++ b/segment/val.py @@ -35,20 +35,24 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative +from models.common import DetectMultiBackend +from models.yolo import DetectionModel +from utils.callbacks import Callbacks import pycocotools.mask as mask_util import torch.nn.functional as F -from models.experimental import attempt_load # scoped to avoid circular import -from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, check_yaml, - coco80_to_coco91_class, colorstr, emojis, increment_path, non_max_suppression, print_args, +from utils.dataloaders import create_dataloader +from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, + coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou from utils.plots import output_to_target, plot_val_study +from utils.torch_utils import smart_inference_mode from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks -from utils.torch_utils import de_parallel, select_device, time_sync +from utils.torch_utils import de_parallel, select_device def save_one_txt(predn, save_conf, shape, file): @@ -66,20 +70,17 @@ def save_one_json(predn, jdict, path, class_map, pred_masks): image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - pred_masks = np.transpose(pred_masks, (2, 0, 1)) rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") - for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): - pred_dict = { + jdict.append({ 'image_id': image_id, 'category_id': class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], - 'score': round(p[4], 5)} - pred_dict["segmentation"] = rles[i] - jdict.append(pred_dict) + 'score': round(p[4], 5), + 'segmentation': rles[i]}) def process_batch(detections, labels, iouv): @@ -117,18 +118,9 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): gt_masks = torch.where(gt_masks == index, 1.0, 0.0) if gt_masks.shape[1:] != pred_masks.shape[1:]: - gt_masks = F.interpolate( - gt_masks.unsqueeze(0), - pred_masks.shape[1:], - mode="bilinear", - align_corners=False, - ).squeeze(0) - gt_masks = gt_masks.gt_(0.5) - - iou = mask_iou( - gt_masks.view(gt_masks.shape[0], -1), - pred_masks.view(pred_masks.shape[0], -1), - ) + gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0].gt_(0.5) + + iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match if x[0].shape[0]: matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] @@ -142,7 +134,7 @@ def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): return correct -@torch.no_grad() +@smart_inference_mode() def run( data, weights=None, # model.pt path(s) @@ -172,6 +164,7 @@ def run( overlap=False, mask_downsample_ratio=1, compute_loss=None, + callbacks=Callbacks(), ): process = process_mask_upsample if save_json else process_mask # Initialize/load model and set device @@ -180,6 +173,7 @@ def run( device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() + nm = de_parallel(model).model[-1].mask_dim # number of masks else: # called directly device = select_device(device, batch_size=batch_size) @@ -188,20 +182,15 @@ def run( (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - # model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) - model = attempt_load(weights, device=device) # load FP32 model - stride = 32 - pt, jit, engine = True, False, False - # stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size - # half = model.fp16 # FP16 supported on limited backends with CUDA - half = device.type != 'cpu' - if half: - model.half() + half = model.fp16 # FP16 supported on limited backends with CUDA + nm = de_parallel(model).model.model[-1].mask_dim if isinstance(model, DetectionModel) else 32 # number of masks if engine: batch_size = model.batch_size else: - # device = model.device + device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') @@ -220,10 +209,10 @@ def run( # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data - ncm = model.nc + ncm = model.model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' - # model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup + model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad = 0.0 if task in ('speed', 'benchmark') else 0.5 rect = False if task == 'benchmark' else pt # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images @@ -241,31 +230,33 @@ def run( seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) - names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} + names = model.names if hasattr(model, 'names') else model.module.names # get class names + if isinstance(names, (list, tuple)): # old format + names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) - s = ("%20s" + "%11s" * 10) % ("Class", "Images", "Labels", "Box:{P", "R", "mAP@.5", "mAP@.5:.95}", "Mask:{P", "R", - "mAP@.5", "mAP@.5:.95}") - dt = [0.0, 0.0, 0.0] + s = ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', "R", "mAP50", "mAP50-95)", "Mask(P", "R", + "mAP50", "mAP50-95)") + dt = Profile(), Profile(), Profile() metrics = Metrics() loss = torch.zeros(4, device=device) jdict, stats = [], [] + # callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar): - t1 = time_sync() - if cuda: - im = im.to(device, non_blocking=True) - targets = targets.to(device) - masks = masks.to(device) - masks = masks.float() - im = im.half() if half else im.float() # uint8 to fp16/32 - im /= 255 # 0 - 255 to 0.0 - 1.0 - nb, _, height, width = im.shape # batch size, channels, height, width - t2 = time_sync() - dt[0] += t2 - t1 + # callbacks.run('on_val_batch_start') + with dt[0]: + if cuda: + im = im.to(device, non_blocking=True) + targets = targets.to(device) + masks = masks.to(device) + masks = masks.float() + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + nb, _, height, width = im.shape # batch size, channels, height, width # Inference - out, train_out = model(im) # if training else model(im, augment=augment, val=True) # inference, loss outputs - dt[1] += time_sync() - t2 + with dt[1]: + out, train_out = model(im) # if training else model(im, augment=augment, val=True) # inference, loss # Loss if compute_loss: @@ -274,19 +265,12 @@ def run( # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling - t3 = time_sync() - out = non_max_suppression(out, - conf_thres, - iou_thres, - labels=lb, - multi_label=True, - agnostic=single_cls, - masks=de_parallel(model).model[-1].mask_dim) - dt[2] += time_sync() - t3 - - # keep pred masks for plotting - plot_masks = [] + with dt[2]: + out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, + masks=nm) + # Metrics + plot_masks = [] # masks for plotting for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions @@ -298,14 +282,16 @@ def run( if npr == 0: if nl: stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) + if plots: + confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) continue - # deal with masks + # Masks midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], - shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, + 1).contiguous().float() # Predictions if single_cls: @@ -322,43 +308,43 @@ def run( correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap) if plots: confusion_matrix.process_batch(predn, labelsn) - stats.append( - (correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) + stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (conf, pcls, tcls) - # convert pred_masks to uint8 pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) if plots and batch_i < 3: - # filter top 15 to plot - plot_masks.append(pred_masks[:15].cpu()) + plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot # Save/log if save_txt: - save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) + save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: pred_masks = scale_masks(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary + # callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: - plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) if len(plot_masks): plot_masks = torch.cat(plot_masks, dim=0) + plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred + # callbacks.run('on_val_batch_end') + # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) metrics.update(results) - nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class - else: - nt = torch.zeros(1) + nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class # Print results - pf = '%20s' + '%11i' * 2 + '%11.3g' * 8 # print format + pf = '%22s' + '%11i' * 2 + '%11.3g' * 8 # print format LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results())) + if nt.sum() == 0: + LOGGER.warning(f'WARNING: no labels found in {task} set, can not compute metrics without labels ⚠️') # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): @@ -366,7 +352,7 @@ def run( LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i))) # Print speeds - t = tuple(x / seen * 1E3 for x in dt) # speeds per image + t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) @@ -374,18 +360,10 @@ def run( # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) + # callbacks.run('on_val_end') + + mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results() - # in case the cocoeval will update map - ( - mp_bbox, - mr_bbox, - map50_bbox, - map_bbox, - mp_mask, - mr_mask, - map50_mask, - map_mask, - ) = metrics.mean_results() # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights @@ -402,22 +380,15 @@ def run( anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api - eval_bbox = COCOeval(anno, pred, 'bbox') - eval_mask = COCOeval(anno, pred, 'segm') - if is_coco: - eval_bbox.params.imgIds = [int(Path(x).stem) - for x in dataloader.dataset.im_files] # image IDs to evaluate - eval_mask.params.imgIds = [int(Path(x).stem) - for x in dataloader.dataset.im_files] # image IDs to evaluate - eval_bbox.evaluate() - eval_bbox.accumulate() - eval_bbox.summarize() - map_bbox, map50_bbox = eval_bbox.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) - - eval_mask.evaluate() - eval_mask.accumulate() - eval_mask.summarize() - map_mask, map50_mask = eval_mask.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) + results = [] + for eval in COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm'): + if is_coco: + eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # img ID to evaluate + eval.evaluate() + eval.accumulate() + eval.summarize() + results.extend(eval.stats[:2]) # update results (mAP@0.5:0.95, mAP@0.5) + map_bbox, map50_bbox, map_mask, map50_mask = results except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') @@ -426,21 +397,8 @@ def run( if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") - final_metric = ( - mp_bbox, - mr_bbox, - map50_bbox, - map_bbox, - mp_mask, - mr_mask, - map50_mask, - map_mask, - ) - return ( - (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), - metrics.get_maps(nc), - t, - ) + final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask + return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t def parse_opt(): diff --git a/val.py b/val.py index 9cacd47b3ab3..d120b625e474 100644 --- a/val.py +++ b/val.py @@ -186,7 +186,7 @@ def run( if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) - s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') + s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95') dt, p, r, f1, mp, mr, map50, map = (Profile(), Profile(), Profile()), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] From 4710a400b933aabad3921ca7b73514a453957299 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 22:37:31 +0000 Subject: [PATCH 153/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/val.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/segment/val.py b/segment/val.py index ffe0c7cc5148..407d061b6d3a 100644 --- a/segment/val.py +++ b/segment/val.py @@ -35,24 +35,23 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.common import DetectMultiBackend -from models.yolo import DetectionModel -from utils.callbacks import Callbacks import pycocotools.mask as mask_util import torch.nn.functional as F +from models.common import DetectMultiBackend +from models.yolo import DetectionModel +from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou from utils.plots import output_to_target, plot_val_study -from utils.torch_utils import smart_inference_mode from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks -from utils.torch_utils import de_parallel, select_device +from utils.torch_utils import de_parallel, select_device, smart_inference_mode def save_one_txt(predn, save_conf, shape, file): @@ -266,7 +265,12 @@ def run( targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: - out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, + out = non_max_suppression(out, + conf_thres, + iou_thres, + labels=lb, + multi_label=True, + agnostic=single_cls, masks=nm) # Metrics @@ -290,8 +294,8 @@ def run( midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).permute(2, 0, - 1).contiguous().float() + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], + shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() # Predictions if single_cls: From 744058e026e9c27b5ff7489277dfc094f3759d41 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 00:42:41 +0200 Subject: [PATCH 154/247] precommit fix --- segment/val.py | 1 - 1 file changed, 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index ffe0c7cc5148..3840d7c8fd0d 100644 --- a/segment/val.py +++ b/segment/val.py @@ -41,7 +41,6 @@ import pycocotools.mask as mask_util import torch.nn.functional as F -from utils.dataloaders import create_dataloader from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) From 858db3477bda367b80b06fe46b2aeb349848fd48 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 00:43:40 +0200 Subject: [PATCH 155/247] precommit fix --- segment/val.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/segment/val.py b/segment/val.py index 76c1b60625a9..ec5070bcc7c4 100644 --- a/segment/val.py +++ b/segment/val.py @@ -35,9 +35,6 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -import pycocotools.mask as mask_util -import torch.nn.functional as F - from models.common import DetectMultiBackend from models.yolo import DetectionModel from utils.callbacks import Callbacks From 1a00dda1eba1b59d6274901ef780750cda445e17 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Aug 2022 22:44:18 +0000 Subject: [PATCH 156/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/val.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/segment/val.py b/segment/val.py index ec5070bcc7c4..57cf60907715 100644 --- a/segment/val.py +++ b/segment/val.py @@ -35,12 +35,12 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.common import DetectMultiBackend -from models.yolo import DetectionModel -from utils.callbacks import Callbacks import pycocotools.mask as mask_util import torch.nn.functional as F +from models.common import DetectMultiBackend +from models.yolo import DetectionModel +from utils.callbacks import Callbacks from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) From 6820f84af5992805701c98705e045ce46eec6999 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 00:57:20 +0200 Subject: [PATCH 157/247] optional pycocotools --- segment/val.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/segment/val.py b/segment/val.py index ec5070bcc7c4..22cff1e67403 100644 --- a/segment/val.py +++ b/segment/val.py @@ -38,7 +38,6 @@ from models.common import DetectMultiBackend from models.yolo import DetectionModel from utils.callbacks import Callbacks -import pycocotools.mask as mask_util import torch.nn.functional as F from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, @@ -65,11 +64,12 @@ def save_one_txt(predn, save_conf, shape, file): def save_one_json(predn, jdict, path, class_map, pred_masks): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} + from pycocotools.mask import encode image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [mask_util.encode(np.asarray(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in pred_masks] + rles = [encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] for x in pred_masks] for rle in rles: rle["counts"] = rle["counts"].decode("utf-8") for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): @@ -164,7 +164,12 @@ def run( compute_loss=None, callbacks=Callbacks(), ): - process = process_mask_upsample if save_json else process_mask + if save_json: + check_requirements(['pycocotools']) + process = process_mask_upsample # more accurate + else: + process = process_mask # faster + # Initialize/load model and set device training = model is not None if training: # called by train.py @@ -377,7 +382,6 @@ def run( json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb - check_requirements(['pycocotools']) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval From 114485bd6a1fe32457c1d9f023196f907dcbca21 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 00:59:05 +0200 Subject: [PATCH 158/247] remove CI pip install pycocotools (auto-installed now) --- .github/workflows/ci-testing.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 5be0884991c9..044ece544648 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -126,7 +126,6 @@ jobs: - name: Test segmentation shell: bash # for Windows compatibility run: | - pip install -q pycocotools m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train From 0acd727a323043b4bcbd0e08340fd38b064c8456 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 01:11:47 +0200 Subject: [PATCH 159/247] seg yaml fix --- data/coco128-seg.yaml | 4 ++-- data/coco128.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/coco128-seg.yaml b/data/coco128-seg.yaml index db05534c526e..a0319670a92e 100644 --- a/data/coco128-seg.yaml +++ b/data/coco128-seg.yaml @@ -1,10 +1,10 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license -# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics +# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # Example usage: python train.py --data coco128.yaml # parent # ├── yolov5 # └── datasets -# └── coco128 ← downloads here (7 MB) +# └── coco128-seg ← downloads here (7 MB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] diff --git a/data/coco128.yaml b/data/coco128.yaml index d551e994e379..12556736a571 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -1,10 +1,10 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license -# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics +# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # Example usage: python train.py --data coco128.yaml # parent # ├── yolov5 # └── datasets -# └── coco128-seg ← downloads here (7 MB) +# └── coco128 ← downloads here (7 MB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] From 2c9719dcdd1a1cdce8cebc22a79b22a95ec5a4de Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 03:08:17 +0200 Subject: [PATCH 160/247] optimize mask_iou() and masks_iou() --- utils/segment/general.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index cc8cc2997541..b97d289f09c1 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -102,7 +102,7 @@ def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): return masks -def mask_iou(mask1, mask2): +def mask_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [M, n] m2 means number of gt objects @@ -110,17 +110,12 @@ def mask_iou(mask1, mask2): return: masks iou, [N, M] """ - # print(mask1.shape) - # print(mask2.shape) intersection = torch.matmul(mask1, mask2.t()).clamp(0) - area1 = torch.sum(mask1, dim=1).view(1, -1) - area2 = torch.sum(mask2, dim=1).view(1, -1) - union = (area1.t() + area2) - intersection + union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection + return intersection / (union + eps) - return intersection / (union + 1e-7) - -def masks_iou(mask1, mask2): +def masks_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [N, n] m2 means number of gt objects @@ -129,7 +124,5 @@ def masks_iou(mask1, mask2): return: masks iou, (N, ) """ intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) - area1 = torch.sum(mask1, dim=1).view(1, -1) - area2 = torch.sum(mask2, dim=1).view(1, -1) - union = (area1 + area2) - intersection - return intersection / (union + 1e-7) + union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection + return intersection / (union + eps) From 4863beb745417e70d30ef905a8f0b7301077f870 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 15:03:17 +0200 Subject: [PATCH 161/247] threaded fix --- utils/segment/plots.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 4517ff455cba..eac46d9853aa 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -8,7 +8,8 @@ import pandas as pd import torch -from ..general import threaded, xywh2xyxy +from .. import threaded +from ..general import xywh2xyxy from ..plots import Annotator, colors From 347968b38bca8d4baf5926a66f3cb64d3b4c4ce7 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 18:00:10 +0200 Subject: [PATCH 162/247] Major train.py update --- segment/train.py | 283 ++++++++++++++++++++++------------------------- 1 file changed, 135 insertions(+), 148 deletions(-) diff --git a/segment/train.py b/segment/train.py index ca23b4256317..b9f284b33eea 100644 --- a/segment/train.py +++ b/segment/train.py @@ -1,15 +1,18 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ -Train a YOLOv5 model on a custom dataset. - +Train a YOLOv5 segment model on a segment dataset Models and datasets download automatically from the latest YOLOv5 release. -Models: https://github.com/ultralytics/yolov5/tree/master/models -Datasets: https://github.com/ultralytics/yolov5/tree/master/data -Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data -Usage: - $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED) - $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch +Usage - Single-GPU training: + $ python segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 # from pretrained (recommended) + $ python segment/train.py --data coco128-seg.yaml --weights '' --cfg yolov5s-seg.yaml --img 640 # from scratch + +Usage - Multi-GPU DDP training: + $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 --device 0,1,2,3 + +Models: https://github.com/ultralytics/yolov5/tree/master/models +Datasets: https://github.com/ultralytics/yolov5/tree/master/data +Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data """ import argparse @@ -19,58 +22,54 @@ import sys import time from copy import deepcopy +from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn -import torch.nn.functional as F -from torch.nn.parallel import DistributedDataParallel as DDP -from torch.optim import SGD, Adam, lr_scheduler +import yaml +from torch.optim import lr_scheduler from tqdm import tqdm -import val # for end-of-epoch mAP - FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative +import torch.nn.functional as F +import segment.val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors -from utils.downloads import attempt_download -from utils.general import (check_dataset, check_file, check_git_status, check_img_size, check_requirements, - check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, - intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args, - print_mutation, strip_optimizer) +from utils.autobatch import check_train_batch_size +from utils.callbacks import Callbacks +from utils.downloads import attempt_download, is_url +from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size, + check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, + init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, + print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import GenericLogger -from utils.plots import plot_evolve, plot_labels from utils.segment.dataloaders import create_dataloader from utils.segment.loss import ComputeLoss -from utils.segment.metrics import fitness -from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first +from utils.segment.metrics import fitness, KEYS +from utils.segment.plots import plot_images_and_masks, plot_results_with_masks +from utils.plots import plot_evolve, plot_labels +from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, + smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -from datetime import datetime - -import yaml -from torch.optim import AdamW - -from utils.autobatch import check_train_batch_size -from utils.general import LOGGER, check_amp, check_version -from utils.segment.metrics import KEYS -from utils.segment.plots import plot_images_and_masks, plot_results_with_masks -def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary +def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio + # callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir @@ -82,18 +81,28 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) + opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: - with open(save_dir / 'hyp.yaml', 'w') as f: - yaml.safe_dump(hyp, f, sort_keys=False) - with open(save_dir / 'opt.yaml', 'w') as f: - yaml.safe_dump(vars(opt), f, sort_keys=False) + yaml_save(save_dir / 'hyp.yaml', hyp) + yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: logger = GenericLogger(opt=opt, console_logger=LOGGER) + # loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance + # if loggers.clearml: + # data_dict = loggers.clearml.data_dict # None if no ClearML dataset or filled in by ClearML + # if loggers.wandb: + # data_dict = loggers.wandb.data_dict + # if resume: + # weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size + # + # # Register actions + # for k in methods(loggers): + # callbacks.register_action(k, callback=getattr(loggers, k)) # Config plots = not evolve and not opt.noplots # create plots @@ -104,8 +113,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes - names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names - assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check + names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset # Model @@ -129,6 +137,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers + # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False @@ -141,35 +150,13 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) logger.update_params({"batch_size": batch_size}) + # loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay - LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") - - g = [], [], [] # optimizer parameter groups - bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() - for v in model.modules(): - if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias - g[2].append(v.bias) - if isinstance(v, bn): # weight (no decay) - g[1].append(v.weight) - elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) - g[0].append(v.weight) - - if opt.optimizer == 'Adam': - optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum - elif opt.optimizer == 'AdamW': - optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum - else: - optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) - - optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay - optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights) - LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " - f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias") - del g + optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: @@ -182,26 +169,10 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume - start_epoch, best_fitness = 0, 0.0 + best_fitness, start_epoch = 0.0, 0 if pretrained: - # Optimizer - if ckpt['optimizer'] is not None: - optimizer.load_state_dict(ckpt['optimizer']) - best_fitness = ckpt['best_fitness'] - - # EMA - if ema and ckpt.get('ema'): - ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) - ema.updates = ckpt['updates'] - - # Epochs - start_epoch = ckpt['epoch'] + 1 if resume: - assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.' - if epochs < start_epoch: - LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") - epochs += ckpt['epoch'] # finetune additional epochs - + best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode @@ -235,9 +206,8 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary mask_downsample_ratio=mask_ratio, overlap_mask=overlap, ) - mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class - print("mlc , nc ", mlc, " ", nc) - nb = len(train_loader) # number of batches + labels = np.concatenate(dataset.labels, 0) + mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 @@ -258,24 +228,17 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary prefix=colorstr('val: '))[0] if not resume: - labels = np.concatenate(dataset.labels, 0) - # c = torch.tensor(labels[:, 0]) # classes - # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency - # model._initialize_biases(cf.to(device)) - if plots: - plot_labels(labels, names, save_dir) - - # Anchors if not opt.noautoanchor: - check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) + check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision + if plots: + plot_labels(labels, names, save_dir) + # callbacks.run('on_pretrain_routine_end', labels, names) + # DDP mode if cuda and RANK != -1: - if check_version(torch.__version__, '1.11.0'): - model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) - else: - model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) + model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) @@ -290,6 +253,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary # Start training t0 = time.time() + nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 @@ -299,11 +263,13 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model, overlap=overlap) # init loss class + # callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ + # callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) @@ -315,16 +281,18 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders + mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) - LOGGER.info(("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "seg", "obj", "cls", "labels", "img_size")) + LOGGER.info(('\n' + '%11s' * 8) % + ('Epoch', 'GPU_mem', 'box_loss', 'seg_loss', 'obj_loss', 'cls_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar optimizer.zero_grad() - for i, (imgs, targets, paths, _, - masks) in pbar: # batch ------------------------------------------------------------- + for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------ + # callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 @@ -350,8 +318,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward - loss, loss_items = compute_loss(pred, targets.to(device), - masks=masks.to(device).float()) # loss scaled by batch_size + loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float()) if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: @@ -360,8 +327,10 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary # Backward scaler.scale(loss).backward() - # Optimize + # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: + scaler.unscale_(optimizer) # unscale gradients + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() @@ -373,21 +342,18 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) - pbar.set_description(("%10s" * 2 + "%10.4g" * 6) % - (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])) - # for plots + pbar.set_description(('%11s' * 2 + '%11.4g' * 6) % + (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) + # callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths) + # if callbacks.stop_training: + # return + + # Mosaic plots if mask_ratio != 1: - masks = F.interpolate( - masks[None, :].float(), - (imgsz, imgsz), - mode="bilinear", - align_corners=False, - ).squeeze(0) + masks = F.interpolate(masks[None].float(), (imgsz, imgsz), mode="bilinear", align_corners=False)[0] if plots: if ni < 3: - f = save_dir / f"train_batch{ni}.jpg" # filename - plot_images_and_masks(imgs, targets, masks, paths, f) - + plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg") if ni == 10: files = sorted(save_dir.glob('train*.jpg')) logger.log_images(files, "Mosaics", epoch) @@ -399,32 +365,38 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary if RANK in {-1, 0}: # mAP + # callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP - results, maps, _ = val.run(data_dict, - batch_size=batch_size // WORLD_SIZE * 2, - imgsz=imgsz, - model=ema.ema, - single_cls=single_cls, - dataloader=val_loader, - save_dir=save_dir, - plots=plots, - compute_loss=compute_loss, - mask_downsample_ratio=mask_ratio, - overlap=overlap) + results, maps, _ = validate.run(data_dict, + batch_size=batch_size // WORLD_SIZE * 2, + imgsz=imgsz, + half=amp, + model=ema.ema, + single_cls=single_cls, + dataloader=val_loader, + save_dir=save_dir, + plots=False, + callbacks=callbacks, + compute_loss=compute_loss, + mask_downsample_ratio=mask_ratio, + overlap=overlap) + # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr + # callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Log val metrics and media metrics_dict = dict(zip(KEYS, log_vals)) logger.log_metrics(metrics_dict, epoch) if plots: files = sorted(save_dir.glob('val*.jpg')) logger.log_images(files, "Validation", epoch) + # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { @@ -435,6 +407,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary 'updates': ema.updates, 'optimizer': optimizer.state_dict(), # 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, + 'opt': vars(opt), 'date': datetime.now().isoformat()} # Save last, best and delete @@ -445,6 +418,7 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary torch.save(ckpt, w / f'epoch{epoch}.pt') logger.log_model(w / f'epoch{epoch}.pt') del ckpt + # callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training @@ -464,24 +438,28 @@ def train(hyp, opt, device): # hyp is path/to/hyp.yaml or hyp dictionary strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') - results, _, _ = val.run( + results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), - iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65 + iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65 single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, + callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # val best model with plots if is_coco: + # callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) logger.log_metrics(metrics_dict, epoch) + + # callbacks.run('on_train_end', last, best, epoch, results) # on train end callback using genericLogger logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs + 1) if not opt.evolve: @@ -503,7 +481,7 @@ def parse_opt(known=False): parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') - parser.add_argument('--epochs', type=int, default=300) + parser.add_argument('--epochs', type=int, default=300, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') @@ -522,7 +500,7 @@ def parse_opt(known=False): parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') - parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save results to project/name') + parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') @@ -535,30 +513,39 @@ def parse_opt(known=False): parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') # Instance Segmentation Args - parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the gt masks to saving memory') - parser.add_argument('--no-overlap', - action='store_true', - help='Overlapping masks train faster at the cost of slight accuray decrease') + parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the truth masks to saving memory') + parser.add_argument('--no-overlap', action='store_true', help='Overlap masks train faster at slightly less mAP') - opt = parser.parse_known_args()[0] if known else parser.parse_args() - return opt + # Weights & Biases arguments + # parser.add_argument('--entity', default=None, help='W&B: Entity') + # parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') + # parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') + # parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') + return parser.parse_known_args()[0] if known else parser.parse_args() -def main(opt): + +def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) check_git_status() - check_requirements(exclude=['thop']) + check_requirements() # Resume - if opt.resume and not opt.evolve: # resume an interrupted run - ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path - assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' - with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: - opt = argparse.Namespace(**yaml.safe_load(f)) # replace - opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate - LOGGER.info(f'Resuming training from {ckpt}') + if opt.resume and not opt.evolve: # resume from specified or most recent last.pt + last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) + opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml + opt_data = opt.data # original dataset + if opt_yaml.is_file(): + with open(opt_yaml, errors='ignore') as f: + d = yaml.safe_load(f) + else: + d = torch.load(last, map_location='cpu')['opt'] + opt = argparse.Namespace(**d) # replace + opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate + if is_url(opt_data): + opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks @@ -586,10 +573,7 @@ def main(opt): # Train if not opt.evolve: - train(opt.hyp, opt, device) - if WORLD_SIZE > 1 and RANK == 0: - LOGGER.info('Destroying process group... ') - dist.destroy_process_group() + train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: @@ -629,6 +613,8 @@ def main(opt): hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 + if opt.noautoanchor: + del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' @@ -668,7 +654,8 @@ def main(opt): hyp[k] = round(hyp[k], 5) # significant digits # Train mutation - results = train(hyp.copy(), opt, device) + results = train(hyp.copy(), opt, device, callbacks) + callbacks = Callbacks() # Write mutation results print_mutation(results, hyp.copy(), save_dir, opt.bucket) From 5956e7dae3912aa268b7ab7fbf12a10a98d328ee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Aug 2022 16:00:42 +0000 Subject: [PATCH 163/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/train.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/segment/train.py b/segment/train.py index b9f284b33eea..ebeebf0c1eaa 100644 --- a/segment/train.py +++ b/segment/train.py @@ -40,6 +40,7 @@ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import torch.nn.functional as F + import segment.val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model @@ -52,11 +53,11 @@ init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import GenericLogger +from utils.plots import plot_evolve, plot_labels from utils.segment.dataloaders import create_dataloader from utils.segment.loss import ComputeLoss -from utils.segment.metrics import fitness, KEYS +from utils.segment.metrics import KEYS, fitness from utils.segment.plots import plot_images_and_masks, plot_results_with_masks -from utils.plots import plot_evolve, plot_labels from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) From 252b8b32078e8f761385238813971e00653ffe93 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 20:43:54 +0200 Subject: [PATCH 164/247] Major segments/val/process_batch() update --- segment/val.py | 54 +++++++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/segment/val.py b/segment/val.py index 21a0b8f0c4fc..8997a5d9e119 100644 --- a/segment/val.py +++ b/segment/val.py @@ -81,17 +81,29 @@ def save_one_json(predn, jdict, path, class_map, pred_masks): 'segmentation': rles[i]}) -def process_batch(detections, labels, iouv): +def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False): """ - Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. + Return correct prediction matrix Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 + detections (array[N, 6]), x1, y1, x2, y2, conf, class + labels (array[M, 5]), class, x1, y1, x2, y2 Returns: - correct (Array[N, 10]), for 10 IoU levels + correct (array[N, 10]), for 10 IoU levels """ - correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) - iou = box_iou(labels[:, 1:], detections[:, :4]) + if masks: + if overlap: + nl = len(labels) + index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 + gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640) + gt_masks = torch.where(gt_masks == index, 1.0, 0.0) + if gt_masks.shape[1:] != pred_masks.shape[1:]: + gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0] + gt_masks = gt_masks.gt_(0.5) + iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) + else: # boxes + iou = box_iou(labels[:, 1:], detections[:, :4]) + + correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match @@ -103,32 +115,6 @@ def process_batch(detections, labels, iouv): # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True - return torch.tensor(correct, dtype=torch.bool, device=iouv.device) - - -def process_batch_masks(predn, pred_masks, gt_masks, labels, iouv, overlap): - correct = torch.zeros(predn.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) - # convert masks (1, 640, 640) -> (n, 640, 640) - if overlap: - nl = len(labels) - index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 - gt_masks = gt_masks.repeat(nl, 1, 1) - gt_masks = torch.where(gt_masks == index, 1.0, 0.0) - - if gt_masks.shape[1:] != pred_masks.shape[1:]: - gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0].gt_(0.5) - - iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) - x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == predn[:, 5])) # IoU above threshold and classes match - if x[0].shape[0]: - matches = (torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()) # [label, detection, iou] - if x[0].shape[0] > 1: - matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] - # matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] - matches = torch.Tensor(matches).to(iouv.device) - correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv return correct @@ -313,7 +299,7 @@ def run( scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct_bboxes = process_batch(predn, labelsn, iouv) - correct_masks = process_batch_masks(predn, pred_masks, gt_masks, labelsn, iouv, overlap=overlap) + correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (conf, pcls, tcls) From 0d08e0e8f26558b9ae0d59e8378632b27eefe8e7 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 21:11:11 +0200 Subject: [PATCH 165/247] yolov5/val updates from segment --- val.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/val.py b/val.py index d120b625e474..3b17fdeeb8e1 100644 --- a/val.py +++ b/val.py @@ -70,12 +70,12 @@ def save_one_json(predn, jdict, path, class_map): def process_batch(detections, labels, iouv): """ - Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format. + Return correct prediction matrix Arguments: - detections (Array[N, 6]), x1, y1, x2, y2, conf, class - labels (Array[M, 5]), class, x1, y1, x2, y2 + detections (array[N, 6]), x1, y1, x2, y2, conf, class + labels (array[M, 5]), class, x1, y1, x2, y2 Returns: - correct (Array[N, 10]), for 10 IoU levels + correct (array[N, 10]), for 10 IoU levels """ correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) iou = box_iou(labels[:, 1:], detections[:, :4]) @@ -90,7 +90,7 @@ def process_batch(detections, labels, iouv): # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True - return torch.tensor(correct, dtype=torch.bool, device=iouv.device) + return correct @smart_inference_mode() From 865dfa7582fd123b34df1e471463d453f3e545d0 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 21:26:43 +0200 Subject: [PATCH 166/247] process_batch numpy/tensor fix --- segment/val.py | 4 ++-- val.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/segment/val.py b/segment/val.py index 8997a5d9e119..87868f755b28 100644 --- a/segment/val.py +++ b/segment/val.py @@ -103,7 +103,7 @@ def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, over else: # boxes iou = box_iou(labels[:, 1:], detections[:, :4]) - correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device) + correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match @@ -115,7 +115,7 @@ def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, over # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True - return correct + return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() diff --git a/val.py b/val.py index 3b17fdeeb8e1..7edf70ed5817 100644 --- a/val.py +++ b/val.py @@ -90,7 +90,7 @@ def process_batch(detections, labels, iouv): # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True - return correct + return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() From 43ca2f293713f2f5db964513d15377e7c0eeb3da Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 25 Aug 2022 23:57:21 +0200 Subject: [PATCH 167/247] opt-in to pycocotools with --save-json --- data/coco128-seg.yaml | 2 +- segment/val.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data/coco128-seg.yaml b/data/coco128-seg.yaml index a0319670a92e..5e81910cc456 100644 --- a/data/coco128-seg.yaml +++ b/data/coco128-seg.yaml @@ -98,4 +98,4 @@ names: # Download script/URL (optional) -download: https://ultralytics.com/assets/coco128-segments.zip +download: https://ultralytics.com/assets/coco128-seg.zip diff --git a/segment/val.py b/segment/val.py index 87868f755b28..bb08383890fc 100644 --- a/segment/val.py +++ b/segment/val.py @@ -419,7 +419,7 @@ def parse_opt(): parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML - opt.save_json |= opt.data.endswith('coco.yaml') + # opt.save_json |= opt.data.endswith('coco.yaml') opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt From 8c9f9069aed73f2512faf8ee0f266a806787e744 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 02:15:21 +0200 Subject: [PATCH 168/247] threaded pycocotools ops for 2x speed increase --- segment/val.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/segment/val.py b/segment/val.py index bb08383890fc..7640a59904d2 100644 --- a/segment/val.py +++ b/segment/val.py @@ -39,6 +39,7 @@ from models.common import DetectMultiBackend from models.yolo import DetectionModel +from utils import threaded from utils.callbacks import Callbacks from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, @@ -62,6 +63,7 @@ def save_one_txt(predn, save_conf, shape, file): f.write(('%g ' * len(line)).rstrip() % line + '\n') +@threaded def save_one_json(predn, jdict, path, class_map, pred_masks): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} from pycocotools.mask import encode From db545f97ec18ead7cf04fac5ab5d335bf551fd74 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 02:44:27 +0200 Subject: [PATCH 169/247] Avoid permute contiguous if possible --- utils/segment/general.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index b97d289f09c1..2044d3f018ac 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -60,10 +60,11 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): downsampled_bboxes[:, 1] *= mh / ih masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes) # HWC - masks = masks.permute(2, 0, 1).contiguous() if upsample: + masks = masks.permute(2, 0, 1).contiguous() masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW - return masks.gt_(0.5).permute(1, 2, 0).contiguous() + masks = masks.permute(1, 2, 0).contiguous() + return masks.gt_(0.5) def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): From 20653374f530f23e04be57190232a88416135586 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 13:16:49 +0200 Subject: [PATCH 170/247] Add max_det=300 argument to both val.py and segment/val.py --- segment/val.py | 3 +++ val.py | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index 7640a59904d2..16bf9c79d0e7 100644 --- a/segment/val.py +++ b/segment/val.py @@ -128,6 +128,7 @@ def run( imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold + max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) @@ -263,6 +264,7 @@ def run( labels=lb, multi_label=True, agnostic=single_cls, + max_det=max_det, masks=nm) # Metrics @@ -404,6 +406,7 @@ def parse_opt(): parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') + parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') diff --git a/val.py b/val.py index 7edf70ed5817..358e5ab6f0f2 100644 --- a/val.py +++ b/val.py @@ -101,6 +101,7 @@ def run( imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold + max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) @@ -214,7 +215,13 @@ def run( targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: - out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) + out = non_max_suppression(out, + conf_thres, + iou_thres, + labels=lb, + multi_label=True, + agnostic=single_cls, + max_det=max_det) # Metrics for si, pred in enumerate(out): @@ -336,6 +343,7 @@ def parse_opt(): parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') + parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') From f21e3497c97a34696d3b8ad0de9660c0d734bbf2 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 14:48:13 +0200 Subject: [PATCH 171/247] fix onnx_dynamic --- models/yolo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/yolo.py b/models/yolo.py index b95f5d078fcf..2180792f15a4 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -125,7 +125,7 @@ def forward(self, x): x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference - if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: + if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.dynamic: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) y = x[i].clone() From cb1649309a7bf48f0f06b28e0427987a69fa3730 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Fri, 26 Aug 2022 21:54:31 +0800 Subject: [PATCH 172/247] speed up pycocotools ops --- segment/val.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/segment/val.py b/segment/val.py index 16bf9c79d0e7..8fbe13ee06a1 100644 --- a/segment/val.py +++ b/segment/val.py @@ -28,6 +28,7 @@ import numpy as np import torch from tqdm import tqdm +from multiprocessing.pool import ThreadPool FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory @@ -39,7 +40,6 @@ from models.common import DetectMultiBackend from models.yolo import DetectionModel -from utils import threaded from utils.callbacks import Callbacks from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, @@ -51,6 +51,7 @@ from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks from utils.torch_utils import de_parallel, select_device, smart_inference_mode +from utils.general import NUM_THREADS def save_one_txt(predn, save_conf, shape, file): @@ -63,17 +64,20 @@ def save_one_txt(predn, save_conf, shape, file): f.write(('%g ' * len(line)).rstrip() % line + '\n') -@threaded def save_one_json(predn, jdict, path, class_map, pred_masks): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} from pycocotools.mask import encode + def single_encode(x): + rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] + rle["counts"] = rle["counts"].decode("utf-8") + return rle + image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner pred_masks = np.transpose(pred_masks, (2, 0, 1)) - rles = [encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] for x in pred_masks] - for rle in rles: - rle["counts"] = rle["counts"].decode("utf-8") + with ThreadPool(NUM_THREADS) as pool: + rles = pool.map(single_encode, pred_masks) for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): jdict.append({ 'image_id': image_id, From 6bec10efe928a81f5089698e4f7b74f442f3a764 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 17:24:37 +0200 Subject: [PATCH 173/247] faster process_mask(upsample=True) for predict --- segment/predict.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index b29f3d2dfd8a..858d2736554a 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -44,7 +44,7 @@ from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box -from utils.segment.general import process_mask_upsample, scale_masks +from utils.segment.general import process_mask, scale_masks from utils.segment.plots import plot_masks from utils.torch_utils import select_device, smart_inference_mode @@ -149,7 +149,7 @@ def run( annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Mask additions --------------------------------------------------------------------------------------- - masks = process_mask_upsample(proto[i], det[:, 6:], det[:, :4], im.shape[2:]) # HWC + masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC masks = masks.permute(2, 0, 1).contiguous() # CHW # Mask additions --------------------------------------------------------------------------------------- From 68f805b8bd25e0bd98f035399f8f46c8c9c3572c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Aug 2022 15:30:06 +0000 Subject: [PATCH 174/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/val.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/segment/val.py b/segment/val.py index 8fbe13ee06a1..70053a8313fe 100644 --- a/segment/val.py +++ b/segment/val.py @@ -23,12 +23,12 @@ import json import os import sys +from multiprocessing.pool import ThreadPool from pathlib import Path import numpy as np import torch from tqdm import tqdm -from multiprocessing.pool import ThreadPool FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory @@ -41,7 +41,7 @@ from models.common import DetectMultiBackend from models.yolo import DetectionModel from utils.callbacks import Callbacks -from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_yaml, +from utils.general import (LOGGER, NUM_THREADS, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou @@ -51,7 +51,6 @@ from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks from utils.torch_utils import de_parallel, select_device, smart_inference_mode -from utils.general import NUM_THREADS def save_one_txt(predn, save_conf, shape, file): @@ -67,6 +66,7 @@ def save_one_txt(predn, save_conf, shape, file): def save_one_json(predn, jdict, path, class_map, pred_masks): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} from pycocotools.mask import encode + def single_encode(x): rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] rle["counts"] = rle["counts"].decode("utf-8") From 0d48eb59c3a5aa3034d8cf0d46cd0b0bd2864b2b Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 17:40:17 +0200 Subject: [PATCH 175/247] eliminate permutations for process_mask(upsample=True) --- segment/predict.py | 3 --- utils/segment/general.py | 8 ++++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 858d2736554a..04944c8a5d84 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -148,10 +148,7 @@ def run( imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): - # Mask additions --------------------------------------------------------------------------------------- masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC - masks = masks.permute(2, 0, 1).contiguous() # CHW - # Mask additions --------------------------------------------------------------------------------------- # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() diff --git a/utils/segment/general.py b/utils/segment/general.py index 2044d3f018ac..a21d69efceeb 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -34,7 +34,10 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): c, mh, mw = proto_out.shape # CHW masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes) # HWC + masks = masks.permute(2, 0, 1).contiguous() + return masks.gt_(0.5) @@ -58,12 +61,13 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): downsampled_bboxes[:, 2] *= mw / iw downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih + masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes) # HWC + masks = masks.permute(2, 0, 1).contiguous() if upsample: - masks = masks.permute(2, 0, 1).contiguous() masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW - masks = masks.permute(1, 2, 0).contiguous() + return masks.gt_(0.5) From 60105076b6d2cfe74ef87b7c7f4e4ba6f5d0de15 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 18:10:16 +0200 Subject: [PATCH 176/247] eliminate permute-contiguous in crop(), use native dimension order --- segment/val.py | 3 +-- utils/segment/general.py | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/segment/val.py b/segment/val.py index 70053a8313fe..f9ca1fcca4ad 100644 --- a/segment/val.py +++ b/segment/val.py @@ -292,8 +292,7 @@ def run( midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], - shape=im[si].shape[1:]).permute(2, 0, 1).contiguous().float() + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).float() # Predictions if single_cls: diff --git a/utils/segment/general.py b/utils/segment/general.py index a21d69efceeb..9e45d58fa0b0 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -3,7 +3,7 @@ import torch.nn.functional as F -def crop(masks, boxes): +def crop(masks, boxes, hwc=True): """ "Crop" predicted masks by zeroing out everything not in the predicted bbox. Vectorized by Chong (thanks Chong). @@ -11,13 +11,21 @@ def crop(masks, boxes): Args: - masks should be a size [h, w, n] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form + - nwc: are masks in height-width-channel HWC order """ - h, w, n = masks.shape - x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1) # x1 shape(1,1,n) - r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None] # rows shape(1,w,1) - c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None] # cols shape(h,1,1) - return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)).float() + if hwc: # hwc used for loss + h, w, n = masks.shape + x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1) # x1 shape(1,1,n) + r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None] # rows shape(1,w,1) + c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None] # cols shape(h,1,1) + else: # chw format used for inference + n, h, w = masks.shape + x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) + r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) + c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) + + return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) def process_mask_upsample(proto_out, out_masks, bboxes, shape): @@ -34,10 +42,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): c, mh, mw = proto_out.shape # CHW masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW - - masks = crop(masks.permute(1, 2, 0).contiguous(), bboxes) # HWC - masks = masks.permute(2, 0, 1).contiguous() - + masks = crop(masks, bboxes, hwc=False) # CHW return masks.gt_(0.5) @@ -62,12 +67,9 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih - masks = crop(masks.permute(1, 2, 0).contiguous(), downsampled_bboxes) # HWC - masks = masks.permute(2, 0, 1).contiguous() - + masks = crop(masks, downsampled_bboxes, hwc=False) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW - return masks.gt_(0.5) From d43b10118fd1100c120e7d904a8edd06503ea725 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 18:21:01 +0200 Subject: [PATCH 177/247] cleanup comment --- utils/segment/general.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index 9e45d58fa0b0..49e8b493ed15 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -11,7 +11,7 @@ def crop(masks, boxes, hwc=True): Args: - masks should be a size [h, w, n] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form - - nwc: are masks in height-width-channel HWC order + - hwc: True if masks in height-width-channel HWC order, pass False for CHW """ if hwc: # hwc used for loss From 61e282c55e7078d51ef486ef033be852a0cee573 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 19:05:01 +0200 Subject: [PATCH 178/247] Add Proto() module --- models/common.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/models/common.py b/models/common.py index 5d49da77a35e..79755e10ab61 100644 --- a/models/common.py +++ b/models/common.py @@ -761,8 +761,22 @@ def __str__(self): return '' +class Proto(nn.Module): + # YOLOv5 mask proto module + def __init__(self, c1, c_, c2): # ch_in, number of protos, number of masks + super().__init__() + self.cv1 = Conv(c1, c_, k=3, p=1) + self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) + # self.upsample = nn.Upsample(scale_factor=2, mode='nearest') + self.cv2 = Conv(c_, c_, k=3, p=1) + self.cv3 = Conv(c_, c2, k=1, p=0) + + def forward(self, x): + return self.cv3(self.cv2(self.upsample(self.cv1(x)))) + + class Classify(nn.Module): - # Classification head, i.e. x(b,c1,20,20) to x(b,c2) + # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2) def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() c_ = 1280 # efficientnet_b0 size From 8823206d0dd544e5f13aa6cbaf87fcee49205292 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 22:12:42 +0200 Subject: [PATCH 179/247] fix class count --- models/segment/yolov5s-seg.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/segment/yolov5s-seg.yaml b/models/segment/yolov5s-seg.yaml index cb71f5853de6..8f26e6800e6b 100644 --- a/models/segment/yolov5s-seg.yaml +++ b/models/segment/yolov5s-seg.yaml @@ -1,7 +1,7 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license -# Parameters 1767976 -nc: 3 # number of classes +# Parameters +nc: 80 # number of classes depth_multiple: 0.33 # model depth multiple width_multiple: 0.5 # layer channel multiple anchors: From ff59beb1a64a893fd1a284be492c5f9ab51ebe1f Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 26 Aug 2022 22:16:22 +0200 Subject: [PATCH 180/247] fix anchor order --- models/yolo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/yolo.py b/models/yolo.py index e440d79f94f5..2d78a31fd088 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -231,8 +231,8 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, i s = 256 # 2x min stride m.inplace = self.inplace m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]]) # forward - m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) + m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride self._initialize_biases() # only run once elif isinstance(m, Detect): From 1b10d12735e7be8b9e182abbecb433c2cf5d00e0 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 01:14:42 +0200 Subject: [PATCH 181/247] broadcast mask_gti in loss for speed --- utils/segment/loss.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 719424478621..cffbc59a513b 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -123,16 +123,11 @@ def __call__(self, preds, targets, masks): # predictions, targets, model for bi in b.unique(): j = b == bi # matching index if self.overlap: - mask_index = tidxs[i][j] - mask_gti = masks[bi][:, :, None].repeat(1, 1, j.sum()) # shape(h,w,n) - mask_gti = torch.where(mask_gti == mask_index, 1.0, 0.0) # shape(h,w,n) + mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) # shape(h,w,n) else: - mask_gti = masks[tidxs[i]][j] - mask_gti = mask_gti.permute(1, 2, 0).contiguous() - + mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous() batch_lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j]) - - # # update tobj + # Update tobj # iou = iou.detach().clamp(0).type(tobj.dtype) # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0] From af8663cb83fa4f93bda9fa6108cded32ae79cc2d Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 01:37:17 +0200 Subject: [PATCH 182/247] Cleanup seg loss --- utils/segment/loss.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index cffbc59a513b..a57555735a9d 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -67,7 +67,7 @@ def __init__(self, model, autobalance=False, overlap=False): def __call__(self, preds, targets, masks): # predictions, targets, model p, proto = preds - bs, nm, mask_h, mask_w = proto.shape # proto shape(bs, mask_h, mask_w, num_masks) + bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width proto = proto.permute(0, 2, 3, 1) lcls = torch.zeros(1, device=self.device) @@ -119,20 +119,17 @@ def __call__(self, preds, targets, masks): # predictions, targets, model torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device)) mxyxys = xywh2xyxy(mxywhs) - batch_lseg = torch.zeros(1, device=self.device) for bi in b.unique(): j = b == bi # matching index if self.overlap: mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) # shape(h,w,n) else: mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous() - batch_lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j]) + lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j]) # Update tobj # iou = iou.detach().clamp(0).type(tobj.dtype) # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0] - lseg += batch_lseg / len(b.unique()) - obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss if self.autobalance: @@ -143,8 +140,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model lbox *= self.hyp["box"] lobj *= self.hyp["obj"] lcls *= self.hyp["cls"] - lseg *= self.hyp["box"] - bs = tobj.shape[0] # batch size + lseg *= self.hyp["box"] / bs loss = lbox + lobj + lcls + lseg return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() From 97e15a024bd7aacecec320223672073e330ebe1c Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 01:55:43 +0200 Subject: [PATCH 183/247] faster indexing --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index a57555735a9d..9255177130fb 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -120,7 +120,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model mxyxys = xywh2xyxy(mxywhs) for bi in b.unique(): - j = b == bi # matching index + j = torch.nonzero(b == bi) # matching index if self.overlap: mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) # shape(h,w,n) else: From 0b83f5d87b49fe92bfc05fe2ccd734de083c1671 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 02:05:35 +0200 Subject: [PATCH 184/247] faster indexing fix --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 9255177130fb..e5671a7e6b15 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -120,7 +120,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model mxyxys = xywh2xyxy(mxywhs) for bi in b.unique(): - j = torch.nonzero(b == bi) # matching index + j = torch.nonzero(b == bi).squeeze() # matching index if self.overlap: mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) # shape(h,w,n) else: From 46c38bb49e54beb7681174e7eb0ea2755f1f2a92 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 02:31:29 +0200 Subject: [PATCH 185/247] faster indexing fix2 --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index e5671a7e6b15..b19bd1e7ef8e 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -122,7 +122,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model for bi in b.unique(): j = torch.nonzero(b == bi).squeeze() # matching index if self.overlap: - mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) # shape(h,w,n) + mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j][None, None], 1.0, 0.0) else: mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous() lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j]) From f273ecb05c53320b74094ec994f97d251a01b767 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 02:39:08 +0200 Subject: [PATCH 186/247] revert faster indexing --- utils/segment/loss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index b19bd1e7ef8e..45033dc87a91 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -120,9 +120,9 @@ def __call__(self, preds, targets, masks): # predictions, targets, model mxyxys = xywh2xyxy(mxywhs) for bi in b.unique(): - j = torch.nonzero(b == bi).squeeze() # matching index + j = b == bi # matching index if self.overlap: - mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j][None, None], 1.0, 0.0) + mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) else: mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous() lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j]) From fc814b79198ba1876e1c04c77c834891c30e4f7d Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 27 Aug 2022 22:22:24 +0530 Subject: [PATCH 187/247] fix validation plotting --- segment/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/train.py b/segment/train.py index ebeebf0c1eaa..98998adda69a 100644 --- a/segment/train.py +++ b/segment/train.py @@ -378,7 +378,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, - plots=False, + plots=plots, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, From 422b8d2eeab972ffff266b635e084a2e29b00c6a Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 22:28:20 +0200 Subject: [PATCH 188/247] Loss cleanup and mxyxy simplification --- utils/segment/loss.py | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 45033dc87a91..e33ab2391590 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -74,7 +74,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model lbox = torch.zeros(1, device=self.device) lobj = torch.zeros(1, device=self.device) lseg = torch.zeros(1, device=self.device) - tcls, tbox, indices, anchors, tidxs, xywh = self.build_targets(p, targets) # targets + tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions @@ -85,7 +85,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model if n: pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, nm, self.nc), 1) # subset of predictions - # Regression + # Box regression pxy = pxy.sigmoid() * 2 - 0.5 pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box @@ -107,28 +107,18 @@ def __call__(self, preds, targets, masks): # predictions, targets, model t[range(n), tcls[i]] = self.cp lcls += self.BCEcls(pcls, t) # BCE - # Mask Regression - if tuple(masks.shape[-2:]) != (mask_h, mask_w): - # downsample shape(bs * num_objs,img_h,img_w) -> (bs * num_objs,mask_h,mask_w) + # Mask regression + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0] - - mxywh = xywh[i] - mws, mhs = mxywh[:, 2:].T - mws, mhs = mws / pi.shape[3], mhs / pi.shape[2] - mxywhs = (mxywh / torch.tensor(pi.shape, device=mxywh.device)[[3, 2, 3, 2]] * - torch.tensor([mask_w, mask_h, mask_w, mask_h], device=mxywh.device)) - mxyxys = xywh2xyxy(mxywhs) - + mwn, mhn = xywhn[i][:, 2:].T # mask width, height normalized + mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) for bi in b.unique(): j = b == bi # matching index if self.overlap: mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) else: mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous() - lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxys[j], mws[j], mhs[j]) - # Update tobj - # iou = iou.detach().clamp(0).type(tobj.dtype) - # tobj[b[index], a[index], gj[index], gi[index]] += 0.5 * iou[0] + lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], mwn[j], mwn[j]) obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss @@ -146,9 +136,8 @@ def __call__(self, preds, targets, masks): # predictions, targets, model return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): - """mask loss of one single pic.""" - # (80, 80, 32) @ (32, n) -> (80, 80, n) - pred_mask = proto @ pred.tanh().T + # Mask loss for one image + pred_mask = proto @ pred.tanh().T # shape(80,80,32) @ (32,n) -> (80,80,n) # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy) # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True) lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") @@ -159,7 +148,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets - tcls, tbox, indices, anch, tidxs, xywh = [], [], [], [], [], [] + tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], [] gain = torch.ones(8, device=self.device) # normalized to gridspace gain ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) if self.overlap: @@ -222,6 +211,6 @@ def build_targets(self, p, targets): anch.append(anchors[a]) # anchors tcls.append(c) # class tidxs.append(tidx) - xywh.append(torch.cat((gxy, gwh), 1)) + xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized - return tcls, tbox, indices, anch, tidxs, xywh + return tcls, tbox, indices, anch, tidxs, xywhn From b5016683e6bbb650ef6078befc53720deb4d2da9 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 27 Aug 2022 22:51:33 +0200 Subject: [PATCH 189/247] Loss cleanup and mxyxy simplification 2 --- utils/segment/loss.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index e33ab2391590..2a451c62e2ed 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -110,7 +110,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model # Mask regression if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0] - mwn, mhn = xywhn[i][:, 2:].T # mask width, height normalized + marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) for bi in b.unique(): j = b == bi # matching index @@ -118,7 +118,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) else: mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous() - lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], mwn[j], mwn[j]) + lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss @@ -135,16 +135,33 @@ def __call__(self, preds, targets, masks): # predictions, targets, model loss = lbox + lobj + lcls + lseg return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() - def single_mask_loss(self, gt_mask, pred, proto, xyxy, w, h): + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image - pred_mask = proto @ pred.tanh().T # shape(80,80,32) @ (32,n) -> (80,80,n) + pred_mask = proto @ pred.T # shape(80,80,32) @ (32,n) -> (80,80,n) # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy) # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True) lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") lseg = crop(lseg, xyxy) - lseg = lseg.mean(dim=(0, 1)) / w / h + lseg = lseg.mean(dim=(0, 1)) / area return lseg.mean() # , iou# + lseg_iou.mean() + def single_mask_loss_v2(self, gt_mask, pred, proto, xyxy, area, fast=False): + pred_mask = proto @ pred.T + + # Crop + h, w, n = pred_mask.shape + x1, y1, x2, y2 = torch.chunk(xyxy.T[None], 4, 1) # x1 shape(1,1,n) + r = torch.arange(w, device=pred_mask.device, dtype=x1.dtype)[None, :, None] # rows shape(1,w,1) + c = torch.arange(h, device=pred_mask.device, dtype=x1.dtype)[:, None, None] + i = (r >= x1) * (r < x2) * (c >= y1) * (c < y2) + + if fast: + return F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i]) + + loss = F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i], reduction="none") + mask_area = i * area + return (loss / mask_area[i]).mean() * area.mean() + def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets From 8d8f79723170efb3b1120a1175914303a6408e47 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sun, 28 Aug 2022 02:25:39 +0530 Subject: [PATCH 190/247] revert validation plotting --- segment/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/train.py b/segment/train.py index 98998adda69a..ebeebf0c1eaa 100644 --- a/segment/train.py +++ b/segment/train.py @@ -378,7 +378,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, - plots=plots, + plots=False, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, From 2361108ac02de8597dfcb9d595d016ef96a6dab0 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 28 Aug 2022 01:02:40 +0200 Subject: [PATCH 191/247] replace missing tanh --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 2a451c62e2ed..5e0368e2909e 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -137,7 +137,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image - pred_mask = proto @ pred.T # shape(80,80,32) @ (32,n) -> (80,80,n) + pred_mask = proto @ pred.tanh().T # shape(80,80,32) @ (32,n) -> (80,80,n) # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy) # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True) lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") From 5bd95410f2d896342259f13e8af77332b1e6f1a7 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 28 Aug 2022 13:00:42 +0200 Subject: [PATCH 192/247] Eliminate last permutation --- utils/segment/loss.py | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 5e0368e2909e..0a54f30f7275 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -68,8 +68,6 @@ def __init__(self, model, autobalance=False, overlap=False): def __call__(self, preds, targets, masks): # predictions, targets, model p, proto = preds bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width - proto = proto.permute(0, 2, 3, 1) - lcls = torch.zeros(1, device=self.device) lbox = torch.zeros(1, device=self.device) lobj = torch.zeros(1, device=self.device) @@ -115,9 +113,9 @@ def __call__(self, preds, targets, masks): # predictions, targets, model for bi in b.unique(): j = b == bi # matching index if self.overlap: - mask_gti = torch.where(masks[bi].unsqueeze(2) == tidxs[i][j], 1.0, 0.0) + mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0) else: - mask_gti = masks[tidxs[i]][j].permute(1, 2, 0).contiguous() + mask_gti = masks[tidxs[i]][j] lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) obji = self.BCEobj(pi[..., 4], tobj) @@ -137,30 +135,9 @@ def __call__(self, preds, targets, masks): # predictions, targets, model def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image - pred_mask = proto @ pred.tanh().T # shape(80,80,32) @ (32,n) -> (80,80,n) - # lseg_iou = self.mask_loss(pred_mask, gt_mask, xyxy) - # iou = self.mask_loss(pred_mask, gt_mask, xyxy, return_iou=True) - lseg = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") - lseg = crop(lseg, xyxy) - lseg = lseg.mean(dim=(0, 1)) / area - return lseg.mean() # , iou# + lseg_iou.mean() - - def single_mask_loss_v2(self, gt_mask, pred, proto, xyxy, area, fast=False): - pred_mask = proto @ pred.T - - # Crop - h, w, n = pred_mask.shape - x1, y1, x2, y2 = torch.chunk(xyxy.T[None], 4, 1) # x1 shape(1,1,n) - r = torch.arange(w, device=pred_mask.device, dtype=x1.dtype)[None, :, None] # rows shape(1,w,1) - c = torch.arange(h, device=pred_mask.device, dtype=x1.dtype)[:, None, None] - i = (r >= x1) * (r < x2) * (c >= y1) * (c < y2) - - if fast: - return F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i]) - - loss = F.binary_cross_entropy_with_logits(pred_mask[i], gt_mask[i], reduction="none") - mask_area = i * area - return (loss / mask_area[i]).mean() * area.mean() + pred_mask = (pred.tanh() @ proto.view(32, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") + return (crop(loss, xyxy, hwc=False).mean(dim=(1, 2)) / area).mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) From 1aff5cc461e534abae58fa9664cdf9eb59d99fef Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 28 Aug 2022 13:16:48 +0200 Subject: [PATCH 193/247] delete unneeded .float() --- segment/val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index f9ca1fcca4ad..b62a85cca19c 100644 --- a/segment/val.py +++ b/segment/val.py @@ -292,7 +292,7 @@ def run( midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]).float() + pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]) # Predictions if single_cls: From 8fe3f91af5d923d1ff2d7045c855b1d87e9808d5 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 28 Aug 2022 13:21:46 +0200 Subject: [PATCH 194/247] Remove MaskIOULoss and crop(if HWC) --- utils/segment/general.py | 21 +++++++-------------- utils/segment/loss.py | 28 ++-------------------------- 2 files changed, 9 insertions(+), 40 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index 49e8b493ed15..ba65eec68a9c 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -3,7 +3,7 @@ import torch.nn.functional as F -def crop(masks, boxes, hwc=True): +def crop(masks, boxes): """ "Crop" predicted masks by zeroing out everything not in the predicted bbox. Vectorized by Chong (thanks Chong). @@ -11,19 +11,12 @@ def crop(masks, boxes, hwc=True): Args: - masks should be a size [h, w, n] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form - - hwc: True if masks in height-width-channel HWC order, pass False for CHW """ - if hwc: # hwc used for loss - h, w, n = masks.shape - x1, y1, x2, y2 = torch.chunk(boxes.T[None], 4, 1) # x1 shape(1,1,n) - r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, :, None] # rows shape(1,w,1) - c = torch.arange(h, device=masks.device, dtype=x1.dtype)[:, None, None] # cols shape(h,1,1) - else: # chw format used for inference - n, h, w = masks.shape - x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) - r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) - c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) + n, h, w = masks.shape + x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) + r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) + c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) @@ -42,7 +35,7 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): c, mh, mw = proto_out.shape # CHW masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW - masks = crop(masks, bboxes, hwc=False) # CHW + masks = crop(masks, bboxes) # CHW return masks.gt_(0.5) @@ -67,7 +60,7 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih - masks = crop(masks, downsampled_bboxes, hwc=False) # CHW + masks = crop(masks, downsampled_bboxes) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 0a54f30f7275..f29fad7f5ff8 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -6,30 +6,7 @@ from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import de_parallel -from .general import crop, masks_iou - - -class MaskIOULoss(nn.Module): - - def __init__(self) -> None: - super().__init__() - - def forward(self, pred_mask, gt_mask, mxyxy=None, return_iou=False): - """ - Args: - pred_mask (torch.Tensor): prediction of masks, (80/160, 80/160, n) - gt_mask (torch.Tensor): ground truth of masks, (80/160, 80/160, n) - mxyxy (torch.Tensor): ground truth of boxes, (n, 4) - """ - _, _, n = pred_mask.shape # same as gt_mask - pred_mask = pred_mask.sigmoid() - if mxyxy is not None: - pred_mask = crop(pred_mask, mxyxy) - gt_mask = crop(gt_mask, mxyxy) - pred_mask = pred_mask.permute(2, 0, 1).view(n, -1) - gt_mask = gt_mask.permute(2, 0, 1).view(n, -1) - iou = masks_iou(pred_mask, gt_mask) - return iou if return_iou else (1.0 - iou) +from .general import crop class ComputeLoss: @@ -57,7 +34,6 @@ def __init__(self, model, autobalance=False, overlap=False): self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance - self.mask_loss = MaskIOULoss() self.na = m.na # number of anchors self.nc = m.nc # number of classes self.nl = m.nl # number of layers @@ -137,7 +113,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred.tanh() @ proto.view(32, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") - return (crop(loss, xyxy, hwc=False).mean(dim=(1, 2)) / area).mean() + return (crop(loss, xyxy).mean(dim=(1, 2)) / area).mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) From f99934c98adf8387989d12e70d4b2cd2c229a0bb Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 28 Aug 2022 18:48:30 +0200 Subject: [PATCH 195/247] Final v6.3 SegmentationModel architecture updates --- .github/workflows/ci-testing.yml | 9 ++- models/common.py | 15 ++--- models/segment/yolov5l-seg.yaml | 2 +- models/segment/yolov5m-seg.yaml | 2 +- models/segment/yolov5n-seg.yaml | 2 +- models/segment/yolov5s-seg.yaml | 2 +- models/segment/yolov5x-seg.yaml | 2 +- models/yolo.py | 108 +++++++++---------------------- segment/predict.py | 2 +- segment/train.py | 8 +-- segment/val.py | 8 +-- utils/general.py | 18 +++--- utils/segment/general.py | 12 ++-- utils/segment/loss.py | 8 +-- 14 files changed, 78 insertions(+), 120 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 044ece544648..65bba5bc366b 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -128,14 +128,17 @@ jobs: run: | m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint - python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train + # python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train + python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train for d in cpu; do # devices - for w in $m $b; do # weights + # for w in $m $b; do # weights + for w in $b; do # weights python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d # val python segment/predict.py --imgsz 64 --weights $w.pt --device $d # predict done done - python export.py --weights $m.pt --img 64 --include torchscript # export + # python export.py --weights $m.pt --img 64 --include torchscript # export + python export.py --weights $b.pt --img 64 --include torchscript # export - name: Test classification shell: bash # for Windows compatibility run: | diff --git a/models/common.py b/models/common.py index 79755e10ab61..014cf2e98d58 100644 --- a/models/common.py +++ b/models/common.py @@ -333,7 +333,7 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() - segmentation_model = type(model.model[-1]).__name__ == 'DetectSegment' + segmentation_model = type(model.model[-1]).__name__ == 'Segment' elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata @@ -762,14 +762,13 @@ def __str__(self): class Proto(nn.Module): - # YOLOv5 mask proto module - def __init__(self, c1, c_, c2): # ch_in, number of protos, number of masks + # YOLOv5 mask Proto module for segmentation models + def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks super().__init__() - self.cv1 = Conv(c1, c_, k=3, p=1) - self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) - # self.upsample = nn.Upsample(scale_factor=2, mode='nearest') - self.cv2 = Conv(c_, c_, k=3, p=1) - self.cv3 = Conv(c_, c2, k=1, p=0) + self.cv1 = Conv(c1, c_, k=3) + self.upsample = nn.Upsample(scale_factor=2, mode='nearest') + self.cv2 = Conv(c_, c_, k=3) + self.cv3 = Conv(c_, c2) def forward(self, x): return self.cv3(self.cv2(self.upsample(self.cv1(x)))) diff --git a/models/segment/yolov5l-seg.yaml b/models/segment/yolov5l-seg.yaml index 98fbe51addfe..4782de11dd2d 100644 --- a/models/segment/yolov5l-seg.yaml +++ b/models/segment/yolov5l-seg.yaml @@ -44,5 +44,5 @@ head: [[-1, 10], 1, Concat, [1]], # cat head P5 [-1, 3, C3, [1024, False]], # 23 (P5/32-large) - [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) ] diff --git a/models/segment/yolov5m-seg.yaml b/models/segment/yolov5m-seg.yaml index 37a0bb3f6050..f73d1992ac19 100644 --- a/models/segment/yolov5m-seg.yaml +++ b/models/segment/yolov5m-seg.yaml @@ -44,5 +44,5 @@ head: [[-1, 10], 1, Concat, [1]], # cat head P5 [-1, 3, C3, [1024, False]], # 23 (P5/32-large) - [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) ] \ No newline at end of file diff --git a/models/segment/yolov5n-seg.yaml b/models/segment/yolov5n-seg.yaml index 40a0409aac46..c28225ab4a50 100644 --- a/models/segment/yolov5n-seg.yaml +++ b/models/segment/yolov5n-seg.yaml @@ -44,5 +44,5 @@ head: [[-1, 10], 1, Concat, [1]], # cat head P5 [-1, 3, C3, [1024, False]], # 23 (P5/32-large) - [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) ] diff --git a/models/segment/yolov5s-seg.yaml b/models/segment/yolov5s-seg.yaml index 8f26e6800e6b..7cbdb36b425c 100644 --- a/models/segment/yolov5s-seg.yaml +++ b/models/segment/yolov5s-seg.yaml @@ -44,5 +44,5 @@ head: [[-1, 10], 1, Concat, [1]], # cat head P5 [-1, 3, C3, [1024, False]], # 23 (P5/32-large) - [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) ] \ No newline at end of file diff --git a/models/segment/yolov5x-seg.yaml b/models/segment/yolov5x-seg.yaml index e1f91c584dca..5d0c4524a99c 100644 --- a/models/segment/yolov5x-seg.yaml +++ b/models/segment/yolov5x-seg.yaml @@ -44,5 +44,5 @@ head: [[-1, 10], 1, Concat, [1]], # cat head P5 [-1, 3, C3, [1024, False]], # 23 (P5/32-large) - [[17, 20, 23], 1, DetectSegment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) + [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) ] diff --git a/models/yolo.py b/models/yolo.py index 2d78a31fd088..0d0c925d9654 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -36,6 +36,7 @@ class Detect(nn.Module): + # YOLOv5 Detect head for detection models stride = None # strides computed during build dynamic = False # force grid reconstruction export = False # export mode @@ -63,15 +64,16 @@ def forward(self, x): if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) - y = x[i].sigmoid() + y = x[i].clone() + y[..., :5 + self.nc].sigmoid_() if self.inplace: y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 - xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 + xy, wh, etc = y.split((2, 2, self.no - 4), 4) # tensor_split((2, 4, 5), 4) if torch 1.8.0 xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh - y = torch.cat((xy, wh, conf), 4) + y = torch.cat((xy, wh, etc), 4) z.append(y.view(bs, -1, self.no)) return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) @@ -87,62 +89,21 @@ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version return grid, anchor_grid -class DetectSegment(Detect): - - def __init__(self, nc=80, anchors=(), mask_dim=32, proto_channel=256, ch=(), inplace=True): +class Segment(Detect): + # YOLOv5 Segment head for segmentation models + def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True): super().__init__(nc, anchors, ch, inplace) - self.mask_dim = mask_dim - self.no = nc + 5 + self.mask_dim # number of outputs per anchor - self.nm = 5 + self.mask_dim - self.proto_c = proto_channel + self.nm = nm # number of masks + self.npr = npr # number of protos + self.no = 5 + nc + self.nm # number of outputs per anchor self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv - - # p3作为输入 - self.proto_net = nn.Sequential( - nn.Conv2d(ch[0], self.proto_c, kernel_size=3, stride=1, padding=1), - nn.SiLU(inplace=True), - # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), - # nn.SiLU(inplace=True), - # nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), - # nn.SiLU(inplace=True), - nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), - nn.Conv2d(self.proto_c, self.proto_c, kernel_size=3, stride=1, padding=1), - nn.SiLU(inplace=True), - nn.Conv2d(self.proto_c, self.mask_dim, kernel_size=1, padding=0), - nn.SiLU(inplace=True)) + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.detect = Detect.forward def forward(self, x): - z = [] # inference output - for i in range(self.nl): - if i == 0: - proto_out = self.proto_net(x[i]) - - x[i] = self.m[i](x[i]) # conv - bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) - x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() - - if not self.training: # inference - if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.dynamic: - self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) - - y = x[i].clone() - y[..., 0:5] = y[..., 0:5].sigmoid() - y[..., self.nm:] = y[..., self.nm:].sigmoid() - if self.inplace: - y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy - y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh - else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 - xy = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i] # xy - wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh - y = torch.cat((xy.type_as(y), wh.type_as(y), y[..., 4:]), -1) - z.append(y.view(-1, self.na * ny * nx, self.no)) - - # TODO: export - if torch.onnx.is_in_onnx_export(): - output = torch.cat(z, 1) - return output # keep the same type with x - else: - return (x, proto_out) if self.training else (torch.cat(z, 1), (x, proto_out)) + p = self.proto(x[0]) + x = self.detect(self, x) + return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p)) class BaseModel(nn.Module): @@ -193,7 +154,7 @@ def _apply(self, fn): # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers self = super()._apply(fn) m = self.model[-1] # Detect() - if isinstance(m, Detect): + if isinstance(m, (Detect, Segment)): m.stride = fn(m.stride) m.grid = list(map(fn, m.grid)) if isinstance(m.anchor_grid, list): @@ -227,22 +188,15 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, i # Build strides, anchors m = self.model[-1] # Detect() - if isinstance(m, DetectSegment): + if isinstance(m, (Detect, Segment)): s = 256 # 2x min stride m.inplace = self.inplace - m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0]]) # forward + forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) + m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward check_anchor_order(m) m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride self._initialize_biases() # only run once - elif isinstance(m, Detect): - s = 256 # 2x min stride - m.inplace = self.inplace - m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.empty(1, ch, s, s))]) # forward - check_anchor_order(m) # must be in pixel-space (not grid-space) - m.anchors /= m.stride.view(-1, 1, 1) - self.stride = m.stride - self._initialize_biases() # only run once # Init weights, biases initialize_weights(self) @@ -303,17 +257,19 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - if hasattr(m, "mask_dim"): - b.data[:, 5 + m.mask_dim:] += math.log(0.6 / - (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls - else: - b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility +class SegmentationModel(DetectionModel): + # YOLOv5 segmentation model + def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None): + super().__init__(cfg, ch, nc, anchors) + + class ClassificationModel(BaseModel): # YOLOv5 classification model def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index @@ -354,14 +310,14 @@ def parse_model(d, ch): # model_dict, input_channels(3) args[j] = eval(a) if isinstance(a, str) else a # eval strings n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain - if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, - BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x): + if m in {Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, + BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}: c1, c2 = ch[f], args[0] if c2 != no: # if not output c2 = make_divisible(c2 * gw, 8) args = [c1, c2, *args[1:]] - if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]: + if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}: args.insert(2, n) # number of repeats n = 1 elif m is nn.BatchNorm2d: @@ -369,11 +325,11 @@ def parse_model(d, ch): # model_dict, input_channels(3) elif m is Concat: c2 = sum(ch[x] for x in f) # TODO: channel, gw, gd - elif m in [Detect, DetectSegment]: + elif m in {Detect, Segment}: args.append([ch[x] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) - if m is DetectSegment: + if m is Segment: args[3] = make_divisible(args[3] * gw, 8) elif m is Contract: c2 = ch[f] * args[0] ** 2 diff --git a/segment/predict.py b/segment/predict.py index 04944c8a5d84..c5b755ad1d62 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -126,7 +126,7 @@ def run( # NMS with dt[2]: - pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, masks=32) + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) diff --git a/segment/train.py b/segment/train.py index ebeebf0c1eaa..36e8f153f677 100644 --- a/segment/train.py +++ b/segment/train.py @@ -43,7 +43,7 @@ import segment.val as validate # for end-of-epoch mAP from models.experimental import attempt_load -from models.yolo import Model +from models.yolo import SegmentationModel from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks @@ -109,7 +109,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio plots = not evolve and not opt.noplots # create plots overlap = not opt.no_overlap cuda = device.type != 'cpu' - init_seeds(opt.seed + 1 + RANK, deterministic=False) + init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] @@ -124,14 +124,14 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak - model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + model = SegmentationModel(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: - model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create + model = SegmentationModel(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create amp = check_amp(model) # check AMP # Freeze diff --git a/segment/val.py b/segment/val.py index b62a85cca19c..c08f0bf5cce6 100644 --- a/segment/val.py +++ b/segment/val.py @@ -39,7 +39,7 @@ import torch.nn.functional as F from models.common import DetectMultiBackend -from models.yolo import DetectionModel +from models.yolo import SegmentationModel from utils.callbacks import Callbacks from utils.general import (LOGGER, NUM_THREADS, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, @@ -169,7 +169,7 @@ def run( device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() - nm = de_parallel(model).model[-1].mask_dim # number of masks + nm = de_parallel(model).model[-1].nm # number of masks else: # called directly device = select_device(device, batch_size=batch_size) @@ -182,7 +182,7 @@ def run( stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA - nm = de_parallel(model).model.model[-1].mask_dim if isinstance(model, DetectionModel) else 32 # number of masks + nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32 # number of masks if engine: batch_size = model.batch_size else: @@ -269,7 +269,7 @@ def run( multi_label=True, agnostic=single_cls, max_det=max_det, - masks=nm) + nm=nm) # Metrics plot_masks = [] # masks for plotting diff --git a/utils/general.py b/utils/general.py index 98b4aa348c33..d336ba91ba5b 100644 --- a/utils/general.py +++ b/utils/general.py @@ -811,7 +811,7 @@ def non_max_suppression( multi_label=False, labels=(), max_det=300, - masks=0, + nm=0, # number of masks ): """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections @@ -820,7 +820,7 @@ def non_max_suppression( """ bs = prediction.shape[0] # batch size - nc = prediction.shape[2] - 5 # number of classes + nc = prediction.shape[2] - nm - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Checks @@ -837,8 +837,8 @@ def non_max_suppression( merge = False # use merge-NMS t = time.time() - si = 5 + masks # box/mask start index - output = [torch.zeros((0, 6 + masks), device=prediction.device)] * bs + mi = 5 + nc # mask start index + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height @@ -847,7 +847,7 @@ def non_max_suppression( # Cat apriori labels if autolabelling if labels and len(labels[xi]): lb = labels[xi] - v = torch.zeros((len(lb), nc + 5), device=x.device) + v = torch.zeros((len(lb), nc + nm + 5), device=x.device) v[:, :4] = lb[:, 1:5] # box v[:, 4] = 1.0 # conf v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls @@ -862,14 +862,14 @@ def non_max_suppression( # Box/Mask box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2) - mask = x[:, 5:si] # zero columns if no masks + mask = x[:, mi:] # zero columns if no masks # Detections matrix nx6 (xyxy, conf, cls) if multi_label: - i, j = (x[:, si:] > conf_thres).nonzero(as_tuple=False).T - x = torch.cat((box[i], x[i, j + si, None], j[:, None].float(), mask[i]), 1) + i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1) else: # best class only - conf, j = x[:, si:].max(1, keepdim=True) + conf, j = x[:, 5:mi].max(1, keepdim=True) x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] # Filter by class diff --git a/utils/segment/general.py b/utils/segment/general.py index ba65eec68a9c..2c62e99b1389 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -21,7 +21,7 @@ def crop(masks, boxes): return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) -def process_mask_upsample(proto_out, out_masks, bboxes, shape): +def process_mask_upsample(protos, masks_in, bboxes, shape): """ Crop after upsample. proto_out: [mask_dim, mask_h, mask_w] @@ -32,14 +32,14 @@ def process_mask_upsample(proto_out, out_masks, bboxes, shape): return: h, w, n """ - c, mh, mw = proto_out.shape # CHW - masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw) + c, mh, mw = protos.shape # CHW + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW masks = crop(masks, bboxes) # CHW return masks.gt_(0.5) -def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): +def process_mask(protos, masks_in, bboxes, shape, upsample=False): """ Crop before upsample. proto_out: [mask_dim, mask_h, mask_w] @@ -50,9 +50,9 @@ def process_mask(proto_out, out_masks, bboxes, shape, upsample=False): return: h, w, n """ - c, mh, mw = proto_out.shape # CHW + c, mh, mw = protos.shape # CHW ih, iw = shape - masks = (out_masks.tanh() @ proto_out.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW downsampled_bboxes = bboxes.clone() downsampled_bboxes[:, 0] *= mw / iw diff --git a/utils/segment/loss.py b/utils/segment/loss.py index f29fad7f5ff8..fa1043488fd8 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -57,7 +57,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model n = b.shape[0] # number of targets if n: - pxy, pwh, _, pmask, pcls, = pi[b, a, gj, gi].split((2, 2, 1, nm, self.nc), 1) # subset of predictions + pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions # Box regression pxy = pxy.sigmoid() * 2 - 0.5 @@ -111,7 +111,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image - pred_mask = (pred.tanh() @ proto.view(32, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") return (crop(loss, xyxy).mean(dim=(1, 2)) / area).mean() @@ -126,10 +126,10 @@ def build_targets(self, p, targets): ti = [] for i in range(batch): num = (targets[:, 0] == i).sum() # find number of targets of each image - ti.append(torch.arange(num, device=targets.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) + ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) ti = torch.cat(ti, 1) # (na, nt) else: - ti = torch.arange(nt, device=targets.device).float().view(1, nt).repeat(na, 1) + ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1) targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices g = 0.5 # bias From 00a23e0c1cd2798be633bc9023477e3235145790 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 28 Aug 2022 16:48:58 +0000 Subject: [PATCH 196/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- models/yolo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index 0d0c925d9654..2d32226a6ba6 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -310,8 +310,9 @@ def parse_model(d, ch): # model_dict, input_channels(3) args[j] = eval(a) if isinstance(a, str) else a # eval strings n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain - if m in {Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, - BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}: + if m in { + Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, + BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}: c1, c2 = ch[f], args[0] if c2 != no: # if not output c2 = make_divisible(c2 * gw, 8) From 2a26bdb002f22a58deff02a6da77f51538116be6 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 30 Aug 2022 13:16:27 +0530 Subject: [PATCH 197/247] Add support for TF export --- models/tf.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/models/tf.py b/models/tf.py index ecb0d4d79c78..ed4f11324160 100644 --- a/models/tf.py +++ b/models/tf.py @@ -30,7 +30,7 @@ from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv, DWConvTranspose2d, Focus, autopad) from models.experimental import MixConv2d, attempt_load -from models.yolo import Detect +from models.yolo import Detect, Segment from utils.activations import SiLU from utils.general import LOGGER, make_divisible, print_args @@ -319,6 +319,29 @@ def _make_grid(nx=20, ny=20): xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny)) return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32) +class TFSegment(TFDetect): + # YOLOv5 Segment head for segmentation models + def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None): + super().__init__(nc, anchors, ch, imgsz, w) + self.nm = nm # number of masks + self.npr = npr # number of protos + self.no = 5 + nc + self.nm # number of outputs per anchor + self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv + self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos + self.detect = TFDetect.call + + def forward(self, x): + p = self.proto(x[0]) + x = self.detect(self, x) + return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p)) + +class TFProto(keras.layers.Layer): + def __init__(self, c1, c_=256, c2=32, w=None): + super().__init__() + self.cv1 = TFConv(c1, c_, k=3, w=w.cv1) + self.upsample = TFUpsample(None, scale_factor=2, mode='nearest') + self.cv2 = TFConv(c_, c_, k=3, w=w.cv2) + self.cv3 = TFConv(c_, c2, w=w.cv3) class TFUpsample(keras.layers.Layer): # TF version of torch.nn.Upsample() @@ -377,7 +400,9 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) args = [ch[f]] elif m is Concat: c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) - elif m is Detect: + elif m in [Detect, Segment]: + import pdb; + pdb.set_trace() args.append([ch[x + 1] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) From 0e0f9c0a4482fbd9286d7c60a29f49ead1057077 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 30 Aug 2022 13:25:43 +0530 Subject: [PATCH 198/247] remove debugger trace --- models/tf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/models/tf.py b/models/tf.py index ed4f11324160..4d9456e565c3 100644 --- a/models/tf.py +++ b/models/tf.py @@ -401,8 +401,6 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) elif m is Concat: c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) elif m in [Detect, Segment]: - import pdb; - pdb.set_trace() args.append([ch[x + 1] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) From b6bca18fec29945e65de993b8b0ba6faa85dfd8e Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 30 Aug 2022 15:22:11 +0530 Subject: [PATCH 199/247] add call --- models/tf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/models/tf.py b/models/tf.py index 4d9456e565c3..d38461065421 100644 --- a/models/tf.py +++ b/models/tf.py @@ -342,6 +342,9 @@ def __init__(self, c1, c_=256, c2=32, w=None): self.upsample = TFUpsample(None, scale_factor=2, mode='nearest') self.cv2 = TFConv(c_, c_, k=3, w=w.cv2) self.cv3 = TFConv(c_, c2, w=w.cv3) + + def call(self, inputs): + return self.cv2(self.cv2(self.upsample(self.cv1(inputs)))) class TFUpsample(keras.layers.Layer): # TF version of torch.nn.Upsample() From c7a2ec9aeb3994c60114ad87072c995b8fbba4dc Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 30 Aug 2022 15:24:37 +0530 Subject: [PATCH 200/247] update --- models/tf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/tf.py b/models/tf.py index d38461065421..41cd3c9e8635 100644 --- a/models/tf.py +++ b/models/tf.py @@ -344,7 +344,7 @@ def __init__(self, c1, c_=256, c2=32, w=None): self.cv3 = TFConv(c_, c2, w=w.cv3) def call(self, inputs): - return self.cv2(self.cv2(self.upsample(self.cv1(inputs)))) + return self.cv3(self.cv2(self.upsample(self.cv1(inputs)))) class TFUpsample(keras.layers.Layer): # TF version of torch.nn.Upsample() From d2af8e1337b77a870c04c7542c56c0aea3dabefa Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Tue, 30 Aug 2022 16:45:07 +0530 Subject: [PATCH 201/247] update --- models/tf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/tf.py b/models/tf.py index 41cd3c9e8635..747ac55a14cb 100644 --- a/models/tf.py +++ b/models/tf.py @@ -330,7 +330,7 @@ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos self.detect = TFDetect.call - def forward(self, x): + def call(self, x): p = self.proto(x[0]) x = self.detect(self, x) return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p)) From 445680c2b54b055733560c3c1140e4ef3a21d700 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 1 Sep 2022 22:44:56 +0200 Subject: [PATCH 202/247] Merge master --- utils/dataloaders.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 3f011911ebf7..ff46b43270ad 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -214,7 +214,7 @@ def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None): self.auto = auto self.transforms = transforms # optional if any(videos): - self.new_video(videos[0]) # new video + self._new_video(videos[0]) # new video else: self.cap = None assert self.nf > 0, f'No images or videos found in {p}. ' \ @@ -239,10 +239,11 @@ def __next__(self): if self.count == self.nf: # last video raise StopIteration path = self.files[self.count] - self.new_video(path) + self._new_video(path) ret_val, im0 = self.cap.read() self.frame += 1 + # im0 = self._cv2_rotate(im0) # for use if cv2 auto rotation is False s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' else: @@ -261,10 +262,23 @@ def __next__(self): return path, im, im0, self.cap, s - def new_video(self, path): + def _new_video(self, path): + # Create a new video capture object self.frame = 0 self.cap = cv2.VideoCapture(path) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees + # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 + + def _cv2_rotate(self, im): + # Rotate a cv2 video manually + if self.orientation == 0: + return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) + elif self.orientation == 180: + return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) + elif self.orientation == 90: + return cv2.rotate(im, cv2.ROTATE_180) + return im def __len__(self): return self.nf # number of files From 342229409fa1d24c146f356fd06d919e5b7e1db0 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 1 Sep 2022 22:46:04 +0200 Subject: [PATCH 203/247] Merge master --- utils/dataloaders.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index ff46b43270ad..f0a50d7c8dca 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -40,6 +40,7 @@ VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes BAR_FORMAT = '{l_bar}{bar:10}{r_bar}{bar:-10b}' # tqdm bar format LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): @@ -83,7 +84,7 @@ def exif_transpose(image): 5: Image.TRANSPOSE, 6: Image.ROTATE_270, 7: Image.TRANSVERSE, - 8: Image.ROTATE_90,}.get(orientation) + 8: Image.ROTATE_90}.get(orientation) if method is not None: image = image.transpose(method) del exif[0x0112] @@ -139,17 +140,16 @@ def create_dataloader(path, loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates # generator = torch.Generator() # generator.manual_seed(0) - return loader( - dataset, - batch_size=batch_size, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=True, - collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, - worker_init_fn=seed_worker, - # generator=generator, - ), dataset + return loader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, + worker_init_fn=seed_worker, + # generator=generator + ), dataset class InfiniteDataLoader(dataloader.DataLoader): @@ -528,7 +528,6 @@ def __init__(self, self.im_files = [self.im_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] - self.segments = [self.segments[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] @@ -1169,6 +1168,6 @@ def create_classification_dataloader(path, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, - pin_memory=True, + pin_memory=PIN_MEMORY, worker_init_fn=seed_worker, generator=generator) # or DataLoader(persistent_workers=True) From 5d7ed132488d357eee3ee4f5eb5f87fc38ff59ea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Sep 2022 20:46:29 +0000 Subject: [PATCH 204/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/dataloaders.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index f0a50d7c8dca..2e499f182f4e 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -140,16 +140,17 @@ def create_dataloader(path, loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates # generator = torch.Generator() # generator.manual_seed(0) - return loader(dataset, - batch_size=batch_size, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=PIN_MEMORY, - collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, - worker_init_fn=seed_worker, - # generator=generator - ), dataset + return loader( + dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, + worker_init_fn=seed_worker, + # generator=generator + ), dataset class InfiniteDataLoader(dataloader.DataLoader): From 2738352f8b54e969778d3af08c921016c9b51777 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 1 Sep 2022 22:48:16 +0200 Subject: [PATCH 205/247] Update dataloaders.py --- utils/dataloaders.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 2e499f182f4e..bc5a66b71a76 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -529,6 +529,7 @@ def __init__(self, self.im_files = [self.im_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] + self.segments = [self.segments[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] From 70e35e557fb7f2d7c87f6f46c95ea8adfb0413bb Mon Sep 17 00:00:00 2001 From: glennjocher Date: Thu, 1 Sep 2022 22:50:07 +0200 Subject: [PATCH 206/247] Restore CI --- .github/workflows/ci-testing.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 65bba5bc366b..d271f6a3786d 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -128,17 +128,15 @@ jobs: run: | m=${{ matrix.model }}-seg # official weights b=runs/train-seg/exp/weights/best # best.pt checkpoint - # python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train + python segment/train.py --imgsz 64 --batch 32 --weights $m.pt --cfg $m.yaml --epochs 1 --device cpu # train python segment/train.py --imgsz 64 --batch 32 --weights '' --cfg $m.yaml --epochs 1 --device cpu # train for d in cpu; do # devices - # for w in $m $b; do # weights - for w in $b; do # weights + for w in $m $b; do # weights python segment/val.py --imgsz 64 --batch 32 --weights $w.pt --device $d # val python segment/predict.py --imgsz 64 --weights $w.pt --device $d # predict + python export.py --weights $w.pt --img 64 --include torchscript --device $d # export done done - # python export.py --weights $m.pt --img 64 --include torchscript # export - python export.py --weights $b.pt --img 64 --include torchscript # export - name: Test classification shell: bash # for Windows compatibility run: | From e74c49f5807f7915b4c5a643efdeac9d5fe93014 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 1 Sep 2022 22:57:16 +0200 Subject: [PATCH 207/247] Update dataloaders.py --- utils/dataloaders.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index bc5a66b71a76..837fea1926c9 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -138,19 +138,17 @@ def create_dataloader(path, nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates - # generator = torch.Generator() - # generator.manual_seed(0) - return loader( - dataset, - batch_size=batch_size, - shuffle=shuffle and sampler is None, - num_workers=nw, - sampler=sampler, - pin_memory=PIN_MEMORY, - collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, - worker_init_fn=seed_worker, - # generator=generator - ), dataset + generator = torch.Generator() + generator.manual_seed(0) + return loader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, + worker_init_fn=seed_worker, + generator=generator), dataset class InfiniteDataLoader(dataloader.DataLoader): From 52f2123e9876f7c29fa907a068f3fa51c67ae0cd Mon Sep 17 00:00:00 2001 From: Jiacong Fang Date: Fri, 2 Sep 2022 20:50:05 +0800 Subject: [PATCH 208/247] Fix TF/TFLite export for segmentation model --- models/tf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/models/tf.py b/models/tf.py index 747ac55a14cb..425ee33abb8f 100644 --- a/models/tf.py +++ b/models/tf.py @@ -333,7 +333,7 @@ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w def call(self, x): p = self.proto(x[0]) x = self.detect(self, x) - return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p)) + return (x, p) if self.training else ((x[0], p),) class TFProto(keras.layers.Layer): def __init__(self, c1, c_=256, c2=32, w=None): @@ -407,6 +407,8 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) args.append([ch[x + 1] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) + if m is Segment: + args[3] = make_divisible(args[3] * gw, 8) args.append(imgsz) else: c2 = ch[f] From 74c3b252ae115ec6d87fbf98279558fd7888ce71 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 2 Sep 2022 15:36:50 +0200 Subject: [PATCH 209/247] Merge master --- segment/predict.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index c5b755ad1d62..7441d6af5777 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -31,7 +31,6 @@ from pathlib import Path import torch -import torch.backends.cudnn as cudnn FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory @@ -99,7 +98,6 @@ def run( # Dataloader if webcam: view_img = check_imshow() - cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: From 5fdd16afe319f72ce6c98f1d828ad58894387d34 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 2 Sep 2022 15:58:10 +0200 Subject: [PATCH 210/247] Cleanup predict.py mask plotting --- segment/predict.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 7441d6af5777..314c93f077ca 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -156,11 +156,9 @@ def run( n = (det[:, 5] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string - # Mask plotting ---------------------------------------------------------------------------------------- - mcolors = [colors(int(cls), True) for cls in det[:, 5]] - im_masks = plot_masks(im[i], masks, mcolors) # image with masks shape(imh,imw,3) + # Mask plotting + im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]]) # shape(imh,imw,3) annotator.im = scale_masks(im.shape[2:], im_masks, im0.shape) # scale to original h, w - # Mask plotting ---------------------------------------------------------------------------------------- # Write results for *xyxy, conf, cls in reversed(det[:, :6]): From 4a3a5bdf0af9aa1b6fcf59d047ef62dc9571ab94 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 2 Sep 2022 19:01:47 +0200 Subject: [PATCH 211/247] cleanup scale_masks() --- utils/segment/general.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index 2c62e99b1389..facf2286dccc 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -66,39 +66,31 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): return masks.gt_(0.5) -def scale_masks(img1_shape, masks, img0_shape, ratio_pad=None): +def scale_masks(im1_shape, masks, im0_shape, ratio_pad=None): """ img1_shape: model input shape, [h, w] img0_shape: origin pic shape, [h, w, 3] masks: [h, w, num] - resize for the most time """ - # Rescale coords (xyxy) from img1_shape to img0_shape - if ratio_pad is None: # calculate from img0_shape - gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new - pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + # Rescale coordinates (xyxy) from im1_shape to im0_shape + if ratio_pad is None: # calculate from im0_shape + gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new + pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding else: - gain = ratio_pad[0][0] pad = ratio_pad[1] - tl_pad = int(pad[1]), int(pad[0]) # y, x - br_pad = int(img1_shape[0] - pad[1]), int(img1_shape[1] - pad[0]) + top, left = int(pad[1]), int(pad[0]) # y, x + bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) if len(masks.shape) < 2: raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') - # masks_h, masks_w, n - masks = masks[tl_pad[0]:br_pad[0], tl_pad[1]:br_pad[1]] - # 1, n, masks_h, masks_w - # masks = masks.permute(2, 0, 1).contiguous()[None, :] - # # shape = [1, n, masks_h, masks_w] after F.interpolate, so take first element - # masks = F.interpolate(masks, img0_shape[:2], mode='bilinear', align_corners=False)[0] + masks = masks[top:bottom, left:right] + # masks = masks.permute(2, 0, 1).contiguous() + # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] # masks = masks.permute(1, 2, 0).contiguous() - # masks_h, masks_w, n - masks = cv2.resize(masks, (img0_shape[1], img0_shape[0])) + masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) - # keepdim if len(masks.shape) == 2: masks = masks[:, :, None] - return masks From b7cd6ea0aee7d5b41d50abfaba32a08f0a9c1d24 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 2 Sep 2022 19:03:24 +0200 Subject: [PATCH 212/247] rename scale_masks to scale_image --- segment/predict.py | 4 ++-- segment/val.py | 4 ++-- utils/segment/general.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 314c93f077ca..fa68d4af6574 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -43,7 +43,7 @@ from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box -from utils.segment.general import process_mask, scale_masks +from utils.segment.general import process_mask, scale_image from utils.segment.plots import plot_masks from utils.torch_utils import select_device, smart_inference_mode @@ -158,7 +158,7 @@ def run( # Mask plotting im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]]) # shape(imh,imw,3) - annotator.im = scale_masks(im.shape[2:], im_masks, im0.shape) # scale to original h, w + annotator.im = scale_image(im.shape[2:], im_masks, im0.shape) # scale to original h, w # Write results for *xyxy, conf, cls in reversed(det[:, :6]): diff --git a/segment/val.py b/segment/val.py index c08f0bf5cce6..1ab33f7a2194 100644 --- a/segment/val.py +++ b/segment/val.py @@ -47,7 +47,7 @@ from utils.metrics import ConfusionMatrix, box_iou from utils.plots import output_to_target, plot_val_study from utils.segment.dataloaders import create_dataloader -from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_masks +from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_image from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks from utils.torch_utils import de_parallel, select_device, smart_inference_mode @@ -319,7 +319,7 @@ def run( if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: - pred_masks = scale_masks(im[si].shape[1:], + pred_masks = scale_image(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary # callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) diff --git a/utils/segment/general.py b/utils/segment/general.py index facf2286dccc..9e68e45e8dcc 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -66,7 +66,7 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): return masks.gt_(0.5) -def scale_masks(im1_shape, masks, im0_shape, ratio_pad=None): +def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): """ img1_shape: model input shape, [h, w] img0_shape: origin pic shape, [h, w, 3] From 92cd027772aff2d2a8b257130c432dd108dc20a6 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 2 Sep 2022 20:22:21 +0200 Subject: [PATCH 213/247] cleanup/optimize plot_masks --- utils/segment/plots.py | 47 ++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index eac46d9853aa..d882dd07d56b 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -13,42 +13,31 @@ from ..plots import Annotator, colors -def plot_masks(img, masks, colors, alpha=0.5): +def plot_masks(im, masks, colors, alpha=0.5): """ Args: - img (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] + im (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] masks (tensor): predicted masks on cuda, shape: [n, h, w] colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] Return: ndarray: img after draw masks, shape: [h, w, 3] - transform colors and send img_gpu to cpu for the most time. """ - img_gpu = img.clone() - num_masks = len(masks) - if num_masks == 0: - return img.permute(1, 2, 0).contiguous().cpu().numpy() * 255 - - # [n, 1, 1, 3] - # faster this way to transform colors - colors = torch.tensor(colors, device=img.device).float() / 255.0 - colors = colors[:, None, None, :] - # [n, h, w, 1] - masks = masks[:, :, :, None] - masks_color = masks.repeat(1, 1, 1, 3) * colors * alpha - inv_alph_masks = masks * (-alpha) + 1 - masks_color_summand = masks_color[0] - if num_masks > 1: - inv_alph_cumul = inv_alph_masks[:(num_masks - 1)].cumprod(dim=0) - masks_color_cumul = masks_color[1:] * inv_alph_cumul - masks_color_summand += masks_color_cumul.sum(dim=0) - - # print(inv_alph_masks.prod(dim=0).shape) # [h, w, 1] - img_gpu = img_gpu.flip(dims=[0]) # filp channel for opencv - img_gpu = img_gpu.permute(1, 2, 0).contiguous() - # [h, w, 3] - img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand - return (img_gpu * 255).byte().cpu().numpy() + if len(masks) == 0: + return im.permute(1, 2, 0).contiguous().cpu().numpy() * 255 + + colors = torch.tensor(colors, device=im.device).float() / 255.0 + colors = colors[:, None, None] # shape(n,1,1,3) + masks = masks.unsqueeze(3) # shape(n,h,w,1) + masks_color = masks * (colors * alpha) # shape(n,h,w,3) + + inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) + mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) + + im = im.flip(dims=[0]) # flip channel + im = im.permute(1, 2, 0).contiguous() # shape(h,w,3) + im = im * inv_alph_masks[-1] + mcs + return (im * 255).byte().cpu().numpy() @threaded @@ -158,7 +147,7 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): data = pd.read_csv(f) index = np.argmax( 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11],) + 0.1 * data.values[:, 11]) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From c9156c4232910586727e7d903a5b6297efdb3e1e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 Sep 2022 18:22:58 +0000 Subject: [PATCH 214/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/segment/plots.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index d882dd07d56b..d3fddf26e22a 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -145,9 +145,8 @@ def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): for f in files: try: data = pd.read_csv(f) - index = np.argmax( - 0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + - 0.1 * data.values[:, 11]) + index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + + 0.1 * data.values[:, 11]) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): From d0f40c306061d77069ad6766e9544f0b69519e34 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 2 Sep 2022 21:03:52 +0200 Subject: [PATCH 215/247] Add Annotator.masks() --- utils/plots.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/utils/plots.py b/utils/plots.py index dd1c072a8846..b09be5d4afc9 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -113,6 +113,16 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2 thickness=tf, lineType=cv2.LINE_AA) + def masks(self, masks, colors, alpha=0.5): + # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...) + if len(masks): + masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] # shape(n,h,w,1) + colors = np.array(colors, dtype=np.float32)[:, None, None] / 255.0 # shape(n,1,1,3) + masks_color = masks * (colors * alpha) # shape(n,h,w,3) + inv_alph_masks = (1 - masks * alpha).cumprod(0) + mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(h,w,3) + self.im[:] = self.im * inv_alph_masks[-1] + mcs + def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) self.draw.rectangle(xy, fill, outline, width) From b1056543c2b14349bf69399219165ed862d9e3bb Mon Sep 17 00:00:00 2001 From: glennjocher Date: Fri, 2 Sep 2022 21:23:21 +0200 Subject: [PATCH 216/247] Annotator.masks() fix --- utils/plots.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/plots.py b/utils/plots.py index b09be5d4afc9..500c203b3593 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -116,8 +116,8 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2 def masks(self, masks, colors, alpha=0.5): # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...) if len(masks): - masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] # shape(n,h,w,1) - colors = np.array(colors, dtype=np.float32)[:, None, None] / 255.0 # shape(n,1,1,3) + masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] / 255.0 # shape(n,h,w,1) + colors = np.array(colors, dtype=np.float32)[:, None, None] # shape(n,1,1,3) masks_color = masks * (colors * alpha) # shape(n,h,w,3) inv_alph_masks = (1 - masks * alpha).cumprod(0) mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(h,w,3) From 1dc663ffcfaa13a13a4801142b2e874799c2d8e0 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 2 Sep 2022 22:32:08 +0200 Subject: [PATCH 217/247] Update plots.py --- utils/plots.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/utils/plots.py b/utils/plots.py index 500c203b3593..0d79cf4ae3ef 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -114,14 +114,17 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2 lineType=cv2.LINE_AA) def masks(self, masks, colors, alpha=0.5): - # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...) - if len(masks): - masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] / 255.0 # shape(n,h,w,1) - colors = np.array(colors, dtype=np.float32)[:, None, None] # shape(n,1,1,3) - masks_color = masks * (colors * alpha) # shape(n,h,w,3) - inv_alph_masks = (1 - masks * alpha).cumprod(0) - mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(h,w,3) - self.im[:] = self.im * inv_alph_masks[-1] + mcs + # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) + n = masks.shape[2] # number of masks + if n: + im = self.im.astype(np.float32) + masks = np.array([alpha], dtype=np.float32) * masks[..., None] / 255.0 # shape(n,h,w,1) + colors = np.array(colors, dtype=np.uint8).reshape((n, 1, 1, 3)) # shape(n,1,1,3) + for i in range(n): + m = masks[:, :, i] + im *= 1.0 - m + im += colors[i] * m + self.im = im.astype(np.uint8) def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) From 558ee483d11fd4dfffa4ebb0d1c738d619c0ff19 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 3 Sep 2022 00:06:15 +0200 Subject: [PATCH 218/247] Annotator mask optimization --- utils/plots.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/utils/plots.py b/utils/plots.py index 500c203b3593..5948827f4f41 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -113,15 +113,14 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2 thickness=tf, lineType=cv2.LINE_AA) - def masks(self, masks, colors, alpha=0.5): - # Add multiple masks of shape(n,h,w) with colors list([r,g,b], [r,g,b], ...) + def masks(self, masks, colors, alpha=0.5, eps=1e-7): + # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) if len(masks): - masks = np.ascontiguousarray(masks).astype(np.float32)[..., None] / 255.0 # shape(n,h,w,1) - colors = np.array(colors, dtype=np.float32)[:, None, None] # shape(n,1,1,3) - masks_color = masks * (colors * alpha) # shape(n,h,w,3) - inv_alph_masks = (1 - masks * alpha).cumprod(0) - mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(h,w,3) - self.im[:] = self.im * inv_alph_masks[-1] + mcs + masks = masks.astype(np.float32) / 255.0 # shape(h,w,n) + colors = np.array(colors, dtype=np.uint8) # shape(n,3) + s = masks.sum(2, keepdims=True) + masks = masks @ colors / (s + eps) # (h,w,n) @ (n,3) = (h,w,3) + self.im[:] = masks * alpha + self.im * (1 - s * alpha) def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) From 2378091a4775413d29485131a4690a59e6b3acc3 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 3 Sep 2022 00:10:42 +0200 Subject: [PATCH 219/247] Rename crop() to crop_mask() --- utils/segment/general.py | 6 +++--- utils/segment/loss.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/utils/segment/general.py b/utils/segment/general.py index 9e68e45e8dcc..36547ed0889c 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -3,7 +3,7 @@ import torch.nn.functional as F -def crop(masks, boxes): +def crop_mask(masks, boxes): """ "Crop" predicted masks by zeroing out everything not in the predicted bbox. Vectorized by Chong (thanks Chong). @@ -35,7 +35,7 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW - masks = crop(masks, bboxes) # CHW + masks = crop_mask(masks, bboxes) # CHW return masks.gt_(0.5) @@ -60,7 +60,7 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih - masks = crop(masks, downsampled_bboxes) # CHW + masks = crop_mask(masks, downsampled_bboxes) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index fa1043488fd8..955faf3a36b4 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -6,7 +6,7 @@ from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import de_parallel -from .general import crop +from .general import crop_mask class ComputeLoss: @@ -113,7 +113,7 @@ def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") - return (crop(loss, xyxy).mean(dim=(1, 2)) / area).mean() + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) From 27c5563d66d5d9024fdd37df01628b6c8222a25f Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 3 Sep 2022 00:12:09 +0200 Subject: [PATCH 220/247] Do not crop in predict.py --- segment/predict.py | 2 +- utils/segment/general.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index fa68d4af6574..37a780f6cde2 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -146,7 +146,7 @@ def run( imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): - masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC + masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], crop=False, upsample=True) # HWC # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() diff --git a/utils/segment/general.py b/utils/segment/general.py index 36547ed0889c..f37f13847cac 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -39,7 +39,7 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): return masks.gt_(0.5) -def process_mask(protos, masks_in, bboxes, shape, upsample=False): +def process_mask(protos, masks_in, bboxes, shape, crop=True, upsample=False): """ Crop before upsample. proto_out: [mask_dim, mask_h, mask_w] @@ -60,7 +60,8 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih - masks = crop_mask(masks, downsampled_bboxes) # CHW + if crop: + masks = crop_mask(masks, downsampled_bboxes) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5) From d1e49e469b6f0c7c1b6d3153bac1cef79d77178a Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 3 Sep 2022 00:15:46 +0200 Subject: [PATCH 221/247] crop always --- segment/predict.py | 2 +- utils/segment/general.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 37a780f6cde2..fa68d4af6574 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -146,7 +146,7 @@ def run( imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): - masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], crop=False, upsample=True) # HWC + masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() diff --git a/utils/segment/general.py b/utils/segment/general.py index f37f13847cac..36547ed0889c 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -39,7 +39,7 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): return masks.gt_(0.5) -def process_mask(protos, masks_in, bboxes, shape, crop=True, upsample=False): +def process_mask(protos, masks_in, bboxes, shape, upsample=False): """ Crop before upsample. proto_out: [mask_dim, mask_h, mask_w] @@ -60,8 +60,7 @@ def process_mask(protos, masks_in, bboxes, shape, crop=True, upsample=False): downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih - if crop: - masks = crop_mask(masks, downsampled_bboxes) # CHW + masks = crop_mask(masks, downsampled_bboxes) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5) From b1357c7fc34d2f52ecbded9c28aae98d64a44b8a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 4 Sep 2022 11:34:59 +0000 Subject: [PATCH 222/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- models/tf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/models/tf.py b/models/tf.py index 425ee33abb8f..8cce147059d3 100644 --- a/models/tf.py +++ b/models/tf.py @@ -319,6 +319,7 @@ def _make_grid(nx=20, ny=20): xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny)) return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32) + class TFSegment(TFDetect): # YOLOv5 Segment head for segmentation models def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None): @@ -335,17 +336,20 @@ def call(self, x): x = self.detect(self, x) return (x, p) if self.training else ((x[0], p),) + class TFProto(keras.layers.Layer): + def __init__(self, c1, c_=256, c2=32, w=None): super().__init__() self.cv1 = TFConv(c1, c_, k=3, w=w.cv1) self.upsample = TFUpsample(None, scale_factor=2, mode='nearest') self.cv2 = TFConv(c_, c_, k=3, w=w.cv2) self.cv3 = TFConv(c_, c2, w=w.cv3) - + def call(self, inputs): return self.cv3(self.cv2(self.upsample(self.cv1(inputs)))) + class TFUpsample(keras.layers.Layer): # TF version of torch.nn.Upsample() def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' From f34346f137f50eb4d44c2c8f0b7c2e2dc64b568f Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 16:41:37 +0200 Subject: [PATCH 223/247] Merge master --- segment/val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index 1ab33f7a2194..faa3f03b3659 100644 --- a/segment/val.py +++ b/segment/val.py @@ -252,7 +252,7 @@ def run( # Inference with dt[1]: - out, train_out = model(im) # if training else model(im, augment=augment, val=True) # inference, loss + out, train_out = model(im) if compute_loss else (model(im, augment=augment), None) # Loss if compute_loss: From 82deb52cd7b59867a772ff2bacc9ee842bce67a6 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 17:19:19 +0200 Subject: [PATCH 224/247] Add vid-stride from master PR --- segment/predict.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index fa68d4af6574..8e4ebbd20028 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -76,6 +76,7 @@ def run( hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference + vid_stride=1, # video frame-rate stride ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images @@ -98,10 +99,10 @@ def run( # Dataloader if webcam: view_img = check_imshow() - dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = len(dataset) # batch_size else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs @@ -245,6 +246,7 @@ def parse_opt(): parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') + parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) From 996a3e460b8521e53cad5276b7046813498311bd Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 17:42:35 +0200 Subject: [PATCH 225/247] Update seg model outputs --- models/yolo.py | 2 +- segment/predict.py | 3 +-- segment/val.py | 25 ++++++++++++------------- val.py | 20 ++++++++++---------- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index 2d32226a6ba6..d59034bd4041 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -103,7 +103,7 @@ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True): def forward(self, x): p = self.proto(x[0]) x = self.detect(self, x) - return (x, p) if self.training else (x[0], p) if self.export else (x[0], (x[1], p)) + return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1]) class BaseModel(nn.Module): diff --git a/segment/predict.py b/segment/predict.py index 8e4ebbd20028..7761f036a714 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -120,8 +120,7 @@ def run( # Inference with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False - pred, out = model(im, augment=augment, visualize=visualize) - proto = out[1] + pred, proto = model(im, augment=augment, visualize=visualize)[:2] # NMS with dt[2]: diff --git a/segment/val.py b/segment/val.py index faa3f03b3659..8576ef490033 100644 --- a/segment/val.py +++ b/segment/val.py @@ -252,7 +252,7 @@ def run( # Inference with dt[1]: - out, train_out = model(im) if compute_loss else (model(im, augment=augment), None) + preds, protos, train_out = model(im) if compute_loss else (*model(im, augment=augment)[:2], None) # Loss if compute_loss: @@ -262,18 +262,18 @@ def run( targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: - out = non_max_suppression(out, - conf_thres, - iou_thres, - labels=lb, - multi_label=True, - agnostic=single_cls, - max_det=max_det, - nm=nm) + preds = non_max_suppression(preds, + conf_thres, + iou_thres, + labels=lb, + multi_label=True, + agnostic=single_cls, + max_det=max_det, + nm=nm) # Metrics plot_masks = [] # masks for plotting - for si, pred in enumerate(out): + for si, (pred, proto) in enumerate(zip(preds, protos)): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] @@ -291,8 +291,7 @@ def run( # Masks midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] - proto_out = train_out[1][si] - pred_masks = process(proto_out, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]) + pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]) # Predictions if single_cls: @@ -329,7 +328,7 @@ def run( if len(plot_masks): plot_masks = torch.cat(plot_masks, dim=0) plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) - plot_images_and_masks(im, output_to_target(out, max_det=15), plot_masks, paths, + plot_images_and_masks(im, output_to_target(preds, max_det=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred # callbacks.run('on_val_batch_end') diff --git a/val.py b/val.py index 32776acb261f..9dee4734214c 100644 --- a/val.py +++ b/val.py @@ -205,7 +205,7 @@ def run( # Inference with dt[1]: - out, train_out = model(im) if compute_loss else (model(im, augment=augment), None) + preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None) # Loss if compute_loss: @@ -215,16 +215,16 @@ def run( targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: - out = non_max_suppression(out, - conf_thres, - iou_thres, - labels=lb, - multi_label=True, - agnostic=single_cls, - max_det=max_det) + preds = non_max_suppression(preds, + conf_thres, + iou_thres, + labels=lb, + multi_label=True, + agnostic=single_cls, + max_det=max_det) # Metrics - for si, pred in enumerate(out): + for si, pred in enumerate(preds): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] @@ -264,7 +264,7 @@ def run( # Plot images if plots and batch_i < 3: plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels - plot_images(im, output_to_target(out), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred + plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred callbacks.run('on_val_batch_end') From a014646144bc1656e63bb4e9b9d2da41cdb45636 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 17:52:21 +0200 Subject: [PATCH 226/247] Update seg model outputs --- segment/val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/val.py b/segment/val.py index 8576ef490033..d2250cee9ca7 100644 --- a/segment/val.py +++ b/segment/val.py @@ -256,7 +256,7 @@ def run( # Loss if compute_loss: - loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls + loss += compute_loss((train_out, protos), targets, masks)[1] # box, obj, cls # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels From 04eb59097c68a2b49dc748a91cac0c68044b63c8 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 18:26:50 +0200 Subject: [PATCH 227/247] Add segmentation benchmarks --- .github/workflows/ci-testing.yml | 4 ++-- utils/benchmarks.py | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 540df6088efa..98cf4aeea990 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -18,7 +18,7 @@ jobs: matrix: os: [ ubuntu-latest ] python-version: [ '3.9' ] # requires python<=3.9 - model: [ yolov5n ] + model: [ yolov5n, yolov5n-seg ] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 @@ -39,7 +39,7 @@ jobs: pip list - name: Run benchmarks run: | - python utils/benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29 + python utils/benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10 Tests: timeout-minutes: 60 diff --git a/utils/benchmarks.py b/utils/benchmarks.py index d5f4c1d61fbe..bec0da5ce4b9 100644 --- a/utils/benchmarks.py +++ b/utils/benchmarks.py @@ -40,10 +40,13 @@ # ROOT = ROOT.relative_to(Path.cwd()) # relative import export -import val +from val import run as val_det +from segment.val import run as val_seg from utils import notebook_init from utils.general import LOGGER, check_yaml, file_size, print_args from utils.torch_utils import select_device +from models.experimental import attempt_load +from models.yolo import SegmentationModel def run( @@ -59,6 +62,7 @@ def run( ): y, t = [], time.time() device = select_device(device) + model_type = type(attempt_load(weights, fuse=False)) # DetectionModel, SegmentationModel, etc. for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, CPU, GPU) try: assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported @@ -76,10 +80,14 @@ def run( assert suffix in str(w), 'export failed' # Validate - result = val.run(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half) - metrics = result[0] # metrics (mp, mr, map50, map, *losses(box, obj, cls)) - speeds = result[2] # times (preprocess, inference, postprocess) - y.append([name, round(file_size(w), 1), round(metrics[3], 4), round(speeds[1], 2)]) # MB, mAP, t_inference + if model_type == SegmentationModel: + result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half) + metric = result[0][7] # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls)) + else: # DetectionModel: + result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='benchmark', half=half) + metric = result[0][3] # (p, r, map50, map, *loss(box, obj, cls)) + speed = result[2][1] # times (preprocess, inference, postprocess) + y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)]) # MB, mAP, t_inference except Exception as e: if hard_fail: assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}' From 11d27a7900a599d7ffebf7a29f07ff6954aa64b5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 4 Sep 2022 16:27:19 +0000 Subject: [PATCH 228/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/benchmarks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/benchmarks.py b/utils/benchmarks.py index bec0da5ce4b9..4301a8d9aa41 100644 --- a/utils/benchmarks.py +++ b/utils/benchmarks.py @@ -40,13 +40,13 @@ # ROOT = ROOT.relative_to(Path.cwd()) # relative import export -from val import run as val_det +from models.experimental import attempt_load +from models.yolo import SegmentationModel from segment.val import run as val_seg from utils import notebook_init from utils.general import LOGGER, check_yaml, file_size, print_args from utils.torch_utils import select_device -from models.experimental import attempt_load -from models.yolo import SegmentationModel +from val import run as val_det def run( From 4016d72807d729228c269bbca2b072753aed899b Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 19:15:21 +0200 Subject: [PATCH 229/247] Add segmentation benchmarks --- .github/workflows/ci-testing.yml | 3 ++- utils/benchmarks.py => benchmarks.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) rename utils/benchmarks.py => benchmarks.py (99%) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 98cf4aeea990..7fa3a467a1c9 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -15,6 +15,7 @@ jobs: Benchmarks: runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ ubuntu-latest ] python-version: [ '3.9' ] # requires python<=3.9 @@ -39,7 +40,7 @@ jobs: pip list - name: Run benchmarks run: | - python utils/benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10 + python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10 Tests: timeout-minutes: 60 diff --git a/utils/benchmarks.py b/benchmarks.py similarity index 99% rename from utils/benchmarks.py rename to benchmarks.py index bec0da5ce4b9..54574eb73f2b 100644 --- a/utils/benchmarks.py +++ b/benchmarks.py @@ -34,7 +34,7 @@ import pandas as pd FILE = Path(__file__).resolve() -ROOT = FILE.parents[1] # YOLOv5 root directory +ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH # ROOT = ROOT.relative_to(Path.cwd()) # relative From e9ab8512fb04427c2f764040dc5fecf4124775ca Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 19:28:09 +0200 Subject: [PATCH 230/247] Add segmentation benchmarks --- .github/workflows/ci-testing.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 7fa3a467a1c9..45da7cb3769d 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -19,7 +19,7 @@ jobs: matrix: os: [ ubuntu-latest ] python-version: [ '3.9' ] # requires python<=3.9 - model: [ yolov5n, yolov5n-seg ] + model: [ yolov5n ] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 @@ -38,9 +38,12 @@ jobs: python --version pip --version pip list - - name: Run benchmarks + - name: Benchmark DetectionModel + run: | + python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29 + - name: Benchmark SegmentationModel run: | - python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.10 + python benchmarks.py --weights ${{ matrix.model }}-seg.pt --img 320 Tests: timeout-minutes: 60 From 5a1abb2510b4e32f05a2938301085b6973cd94da Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 19:39:29 +0200 Subject: [PATCH 231/247] Add segmentation benchmarks --- .github/workflows/ci-testing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 45da7cb3769d..63b5f8276176 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -40,10 +40,10 @@ jobs: pip list - name: Benchmark DetectionModel run: | - python benchmarks.py --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29 + python benchmarks.py --data coco128.yaml --weights ${{ matrix.model }}.pt --img 320 --hard-fail 0.29 - name: Benchmark SegmentationModel run: | - python benchmarks.py --weights ${{ matrix.model }}-seg.pt --img 320 + python benchmarks.py --data coco128-seg.yaml --weights ${{ matrix.model }}-seg.pt --img 320 Tests: timeout-minutes: 60 From 29c03dac5bc1199ce64ac28f8bad56a79b57b00f Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sun, 4 Sep 2022 19:49:50 +0200 Subject: [PATCH 232/247] Fix DetectMultiBackend for OpenVINO --- models/common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/models/common.py b/models/common.py index bbdda5ceb6cc..746762004403 100644 --- a/models/common.py +++ b/models/common.py @@ -373,7 +373,6 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, if batch_dim.is_static: batch_size = batch_dim.get_length() executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2 - output_layer = next(iter(executable_network.outputs)) stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') @@ -477,7 +476,7 @@ def forward(self, im, augment=False, visualize=False): y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) elif self.xml: # OpenVINO im = im.cpu().numpy() # FP32 - y = self.executable_network([im])[self.output_layer] + y = list(self.executable_network([im]).values()) elif self.engine: # TensorRT if self.dynamic and im.shape != self.bindings['images'].shape: i_in, i_out = (self.model.get_binding_index(x) for x in ('images', 'output')) From 3b63e8b79384351e449665dfd9ad3507308071e4 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 5 Sep 2022 14:49:41 +0800 Subject: [PATCH 233/247] update Annotator.masks --- segment/predict.py | 22 ++++++++++++------ utils/plots.py | 50 +++++++++++++++++++++++++++++++++++------ utils/segment/plots.py | 51 +++++++++++++++++------------------------- 3 files changed, 78 insertions(+), 45 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 7761f036a714..b17ba17daf94 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -77,6 +77,7 @@ def run( half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride + retina_masks=False, ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images @@ -144,7 +145,7 @@ def run( s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop - annotator = Annotator(im0, line_width=line_thickness, example=str(names)) + annotator = Annotator(im0, line_width=line_thickness, example=str(names), pil=True) if len(det): masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC @@ -157,8 +158,13 @@ def run( s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Mask plotting - im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]]) # shape(imh,imw,3) - annotator.im = scale_image(im.shape[2:], im_masks, im0.shape) # scale to original h, w + import time + tms = time.time() + annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i], retina_masks=retina_masks) + tme = time.time() + print("plot mask:", tme - tms) + # im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]]) # shape(imh,imw,3) + # annotator.im = scale_image(im.shape[2:], im_masks, im0.shape) # scale to original h, w # Write results for *xyxy, conf, cls in reversed(det[:, :6]): @@ -183,7 +189,8 @@ def run( cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) - cv2.waitKey(1) # 1 millisecond + if cv2.waitKey(1) == ord('q'): # 1 millisecond + exit() # Save results (image with detections) if save_img: @@ -205,7 +212,7 @@ def run( vid_writer[i].write(im0) # Print time (inference-only) - LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") + # LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image @@ -219,8 +226,8 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)') - parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') + parser.add_argument('--weights', nargs='+', type=str, default='../weights/yolov5n-seg.pt', help='model path(s)') + parser.add_argument('--source', type=str, default='/home/laughing/Downloads/MOT17-03-FRCNN-raw.mp4', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') @@ -246,6 +253,7 @@ def parse_opt(): parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') + parser.add_argument('--retina-masks', default=True, action='store_true', help='whether to plot masks in native resolution') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) diff --git a/utils/plots.py b/utils/plots.py index 5948827f4f41..0842688922ec 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -22,6 +22,7 @@ from utils import TryExcept, threaded from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path, is_ascii, xywh2xyxy, xyxy2xywh) +from utils.segment.general import scale_image from utils.metrics import fitness # Settings @@ -113,14 +114,49 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2 thickness=tf, lineType=cv2.LINE_AA) - def masks(self, masks, colors, alpha=0.5, eps=1e-7): - # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) - if len(masks): - masks = masks.astype(np.float32) / 255.0 # shape(h,w,n) - colors = np.array(colors, dtype=np.uint8) # shape(n,3) - s = masks.sum(2, keepdims=True) - masks = masks @ colors / (s + eps) # (h,w,n) @ (n,3) = (h,w,3) + def masks(self, masks, colors, img_gpu, retina_masks=False, alpha=0.5): + """Plot masks at once. + Args: + masks (tensor): predicted masks on cuda, shape: [n, h, w] + colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] + img_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] + retina_masks (bool): whether to plot masks in native resolution. + """ + if self.pil: + # convert to numpy first + self.im = np.asarray(self.im).copy() + if retina_masks: + # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) + if len(masks) == 0: + return + masks = torch.as_tensor(masks, dtype=torch.uint8) + masks = masks.permute(1, 2, 0).contiguous() + masks = masks.cpu().numpy() + masks = scale_image(img_gpu.shape[1:], masks, self.im.shape) + masks = np.asarray(masks, dtype=np.float32) + colors = np.asarray(colors, dtype=np.float32) # shape(n,3) + s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together + masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3) self.im[:] = masks * alpha + self.im * (1 - s * alpha) + else: + if len(masks) == 0: + self.im[:] = img_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 + colors = torch.tensor(colors, device=img_gpu.device, dtype=torch.float32) / 255.0 + colors = colors[:, None, None] # shape(n,1,1,3) + masks = masks.unsqueeze(3) # shape(n,h,w,1) + masks_color = masks * (colors * alpha) # shape(n,h,w,3) + + inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) + mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) + + img_gpu = img_gpu.flip(dims=[0]) # flip channel + img_gpu = img_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) + img_gpu = img_gpu * inv_alph_masks[-1] + mcs + im_mask = (img_gpu * 255).byte().cpu().numpy() + self.im[:] = scale_image(img_gpu.shape, im_mask, self.im.shape) + if self.pil: + # convert im back to PIL and update draw + self.fromarray(self.im) def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index d3fddf26e22a..21cbdfe1e6de 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -13,33 +13,6 @@ from ..plots import Annotator, colors -def plot_masks(im, masks, colors, alpha=0.5): - """ - Args: - im (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] - masks (tensor): predicted masks on cuda, shape: [n, h, w] - colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] - Return: - ndarray: img after draw masks, shape: [h, w, 3] - - """ - if len(masks) == 0: - return im.permute(1, 2, 0).contiguous().cpu().numpy() * 255 - - colors = torch.tensor(colors, device=im.device).float() / 255.0 - colors = colors[:, None, None] # shape(n,1,1,3) - masks = masks.unsqueeze(3) # shape(n,h,w,1) - masks_color = masks * (colors * alpha) # shape(n,h,w,3) - - inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) - mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) - - im = im.flip(dims=[0]) # flip channel - im = im.permute(1, 2, 0).contiguous() # shape(h,w,3) - im = im * inv_alph_masks[-1] + mcs - return (im * 255).byte().cpu().numpy() - - @threaded def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None): # Plot image grid with labels @@ -119,7 +92,9 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' image_masks = masks[idx] im = np.asarray(annotator.im).copy() - for j, box in enumerate(boxes.T.tolist()): + resized_masks = [] + masks_colors = [] + for j in range(len(boxes)): if labels or conf[j] > 0.25: # 0.25 conf thresh color = colors(classes[j]) mh, mw = image_masks[j].shape @@ -129,9 +104,23 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' mask = mask.astype(np.bool) else: mask = image_masks[j].astype(np.bool) - with contextlib.suppress(Exception): - im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 - annotator.fromarray(im) + resized_masks.append(mask) + masks_colors.append(color) + annotator.masks(resized_masks, colors, images[0], retina_masks=True) + # + # for j, box in enumerate(boxes.T.tolist()): + # if labels or conf[j] > 0.25: # 0.25 conf thresh + # color = colors(classes[j]) + # mh, mw = image_masks[j].shape + # if mh != h or mw != w: + # mask = image_masks[j].astype(np.uint8) + # mask = cv2.resize(mask, (w, h)) + # mask = mask.astype(np.bool) + # else: + # mask = image_masks[j].astype(np.bool) + # with contextlib.suppress(Exception): + # im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + # annotator.fromarray(im) annotator.im.save(fname) # save From 69e59936de0ef4cf461bdc3207d832c33224eefc Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 5 Sep 2022 15:17:59 +0800 Subject: [PATCH 234/247] fix val plot --- segment/predict.py | 2 +- utils/plots.py | 13 +++++++------ utils/segment/plots.py | 15 +++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index b17ba17daf94..fb5c578d3f86 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -160,7 +160,7 @@ def run( # Mask plotting import time tms = time.time() - annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i], retina_masks=retina_masks) + annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i] if retina_masks else None) tme = time.time() print("plot mask:", tme - tms) # im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]]) # shape(imh,imw,3) diff --git a/utils/plots.py b/utils/plots.py index 0842688922ec..681db44aede7 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -114,7 +114,7 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2 thickness=tf, lineType=cv2.LINE_AA) - def masks(self, masks, colors, img_gpu, retina_masks=False, alpha=0.5): + def masks(self, masks, colors, img_gpu=None, alpha=0.5): """Plot masks at once. Args: masks (tensor): predicted masks on cuda, shape: [n, h, w] @@ -125,14 +125,15 @@ def masks(self, masks, colors, img_gpu, retina_masks=False, alpha=0.5): if self.pil: # convert to numpy first self.im = np.asarray(self.im).copy() - if retina_masks: + if img_gpu is None: # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) if len(masks) == 0: return - masks = torch.as_tensor(masks, dtype=torch.uint8) - masks = masks.permute(1, 2, 0).contiguous() - masks = masks.cpu().numpy() - masks = scale_image(img_gpu.shape[1:], masks, self.im.shape) + if isinstance(masks, torch.Tensor): + masks = torch.as_tensor(masks, dtype=torch.uint8) + masks = masks.cpu().numpy() + masks = np.ascontiguousarray(masks.transpose(1, 2, 0)) + masks = scale_image(masks.shape[1:], masks, self.im.shape) masks = np.asarray(masks, dtype=np.float32) colors = np.asarray(colors, dtype=np.float32) # shape(n,3) s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 21cbdfe1e6de..79d3c812c954 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -94,20 +94,19 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' im = np.asarray(annotator.im).copy() resized_masks = [] masks_colors = [] - for j in range(len(boxes)): + for j in range(len(boxes.T)): if labels or conf[j] > 0.25: # 0.25 conf thresh - color = colors(classes[j]) + color = np.array(colors(classes[j])) mh, mw = image_masks[j].shape + mask = image_masks[j].astype(np.uint8) if mh != h or mw != w: - mask = image_masks[j].astype(np.uint8) mask = cv2.resize(mask, (w, h)) - mask = mask.astype(np.bool) - else: - mask = image_masks[j].astype(np.bool) resized_masks.append(mask) masks_colors.append(color) - annotator.masks(resized_masks, colors, images[0], retina_masks=True) - # + if len(resized_masks): + resized_masks = np.stack(resized_masks, axis=0) + annotator.masks(resized_masks, masks_colors) + # for j, box in enumerate(boxes.T.tolist()): # if labels or conf[j] > 0.25: # 0.25 conf thresh # color = colors(classes[j]) From 6d0e952ab3afaa0e552f82a3e7f6bc78ddbeefba Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 5 Sep 2022 15:18:25 +0800 Subject: [PATCH 235/247] revert val plot --- utils/segment/plots.py | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/utils/segment/plots.py b/utils/segment/plots.py index 79d3c812c954..e882c14390f0 100644 --- a/utils/segment/plots.py +++ b/utils/segment/plots.py @@ -92,34 +92,19 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg' image_masks = masks[idx] im = np.asarray(annotator.im).copy() - resized_masks = [] - masks_colors = [] - for j in range(len(boxes.T)): + for j, box in enumerate(boxes.T.tolist()): if labels or conf[j] > 0.25: # 0.25 conf thresh - color = np.array(colors(classes[j])) + color = colors(classes[j]) mh, mw = image_masks[j].shape - mask = image_masks[j].astype(np.uint8) if mh != h or mw != w: + mask = image_masks[j].astype(np.uint8) mask = cv2.resize(mask, (w, h)) - resized_masks.append(mask) - masks_colors.append(color) - if len(resized_masks): - resized_masks = np.stack(resized_masks, axis=0) - annotator.masks(resized_masks, masks_colors) - - # for j, box in enumerate(boxes.T.tolist()): - # if labels or conf[j] > 0.25: # 0.25 conf thresh - # color = colors(classes[j]) - # mh, mw = image_masks[j].shape - # if mh != h or mw != w: - # mask = image_masks[j].astype(np.uint8) - # mask = cv2.resize(mask, (w, h)) - # mask = mask.astype(np.bool) - # else: - # mask = image_masks[j].astype(np.bool) - # with contextlib.suppress(Exception): - # im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 - # annotator.fromarray(im) + mask = mask.astype(np.bool) + else: + mask = image_masks[j].astype(np.bool) + with contextlib.suppress(Exception): + im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + annotator.fromarray(im) annotator.im.save(fname) # save From 71780b2a61279570d97d7a5d78ed04c9aa529310 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 5 Sep 2022 15:32:03 +0800 Subject: [PATCH 236/247] clean up --- segment/predict.py | 17 +++++------------ utils/plots.py | 5 +++-- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index fb5c578d3f86..8c07e6747d7f 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -43,8 +43,7 @@ from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box -from utils.segment.general import process_mask, scale_image -from utils.segment.plots import plot_masks +from utils.segment.general import process_mask from utils.torch_utils import select_device, smart_inference_mode @@ -158,13 +157,7 @@ def run( s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Mask plotting - import time - tms = time.time() - annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=im[i] if retina_masks else None) - tme = time.time() - print("plot mask:", tme - tms) - # im_masks = plot_masks(im[i], masks, colors=[colors(x, True) for x in det[:, 5]]) # shape(imh,imw,3) - # annotator.im = scale_image(im.shape[2:], im_masks, im0.shape) # scale to original h, w + annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=None if retina_masks else im[i]) # Write results for *xyxy, conf, cls in reversed(det[:, :6]): @@ -226,8 +219,8 @@ def run( def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default='../weights/yolov5n-seg.pt', help='model path(s)') - parser.add_argument('--source', type=str, default='/home/laughing/Downloads/MOT17-03-FRCNN-raw.mp4', help='file/dir/URL/glob, 0 for webcam') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-seg.pt', help='model path(s)') + parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') @@ -253,7 +246,7 @@ def parse_opt(): parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') - parser.add_argument('--retina-masks', default=True, action='store_true', help='whether to plot masks in native resolution') + parser.add_argument('--retina-masks', action='store_true', help='whether to plot masks in native resolution') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) diff --git a/utils/plots.py b/utils/plots.py index 681db44aede7..acbd6c5c3ca2 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -131,9 +131,10 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5): return if isinstance(masks, torch.Tensor): masks = torch.as_tensor(masks, dtype=torch.uint8) + masks = masks.permute(1, 2, 0).contiguous() masks = masks.cpu().numpy() - masks = np.ascontiguousarray(masks.transpose(1, 2, 0)) - masks = scale_image(masks.shape[1:], masks, self.im.shape) + # masks = np.ascontiguousarray(masks.transpose(1, 2, 0)) + masks = scale_image(masks.shape[:2], masks, self.im.shape) masks = np.asarray(masks, dtype=np.float32) colors = np.asarray(colors, dtype=np.float32) # shape(n,3) s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together From d53c8256b51780841dfaf20155601d98c5789ae7 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 5 Sep 2022 15:36:49 +0800 Subject: [PATCH 237/247] revert pil --- segment/predict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/predict.py b/segment/predict.py index 8c07e6747d7f..a55f99b7093c 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -144,7 +144,7 @@ def run( s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop - annotator = Annotator(im0, line_width=line_thickness, example=str(names), pil=True) + annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC From 78a42d2684cc2fc168ed01dcfbcec61d6c2bcb7b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 5 Sep 2022 08:19:13 +0000 Subject: [PATCH 238/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/predict.py | 62 ++++++++++++++++++++++++---------------------- utils/plots.py | 6 ++--- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index a55f99b7093c..fe1bf8d80af7 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -49,34 +49,34 @@ @smart_inference_mode() def run( - weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) - source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam - data=ROOT / 'data/coco128.yaml', # dataset.yaml path - imgsz=(640, 640), # inference size (height, width) - conf_thres=0.25, # confidence threshold - iou_thres=0.45, # NMS IOU threshold - max_det=1000, # maximum detections per image - device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu - view_img=False, # show results - save_txt=False, # save results to *.txt - save_conf=False, # save confidences in --save-txt labels - save_crop=False, # save cropped prediction boxes - nosave=False, # do not save images/videos - classes=None, # filter by class: --class 0, or --class 0 2 3 - agnostic_nms=False, # class-agnostic NMS - augment=False, # augmented inference - visualize=False, # visualize features - update=False, # update all models - project=ROOT / 'runs/predict-seg', # save results to project/name - name='exp', # save results to project/name - exist_ok=False, # existing project/name ok, do not increment - line_thickness=3, # bounding box thickness (pixels) - hide_labels=False, # hide labels - hide_conf=False, # hide confidences - half=False, # use FP16 half-precision inference - dnn=False, # use OpenCV DNN for ONNX inference - vid_stride=1, # video frame-rate stride - retina_masks=False, + weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) + source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + data=ROOT / 'data/coco128.yaml', # dataset.yaml path + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / 'runs/predict-seg', # save results to project/name + name='exp', # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + vid_stride=1, # video frame-rate stride + retina_masks=False, ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images @@ -157,7 +157,9 @@ def run( s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Mask plotting - annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], img_gpu=None if retina_masks else im[i]) + annotator.masks(masks, + colors=[colors(x, True) for x in det[:, 5]], + img_gpu=None if retina_masks else im[i]) # Write results for *xyxy, conf, cls in reversed(det[:, :6]): @@ -182,7 +184,7 @@ def run( cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) - if cv2.waitKey(1) == ord('q'): # 1 millisecond + if cv2.waitKey(1) == ord('q'): # 1 millisecond exit() # Save results (image with detections) diff --git a/utils/plots.py b/utils/plots.py index acbd6c5c3ca2..c1298e9cc53a 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -22,8 +22,8 @@ from utils import TryExcept, threaded from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_coords, increment_path, is_ascii, xywh2xyxy, xyxy2xywh) -from utils.segment.general import scale_image from utils.metrics import fitness +from utils.segment.general import scale_image # Settings RANK = int(os.getenv('RANK', -1)) @@ -137,8 +137,8 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5): masks = scale_image(masks.shape[:2], masks, self.im.shape) masks = np.asarray(masks, dtype=np.float32) colors = np.asarray(colors, dtype=np.float32) # shape(n,3) - s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together - masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3) + s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together + masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3) self.im[:] = masks * alpha + self.im * (1 - s * alpha) else: if len(masks) == 0: From 1b3bacb932322efd91fa0a8d2bd1d28829ad4e20 Mon Sep 17 00:00:00 2001 From: glennjocher Date: Sat, 10 Sep 2022 12:13:57 +0300 Subject: [PATCH 239/247] Fix CI error --- val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/val.py b/val.py index c46109545cc0..5763f49eb663 100644 --- a/val.py +++ b/val.py @@ -266,7 +266,7 @@ def run( plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred - callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, out) + callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds) # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy From 87c7c68dde33d415d9447b4665bdd9bbfae8dd1f Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 12 Sep 2022 11:23:59 +0800 Subject: [PATCH 240/247] fix predict log --- segment/predict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/segment/predict.py b/segment/predict.py index 7c11abebc910..24ad81774a3f 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -208,7 +208,7 @@ def run( vid_writer[i].write(im0) # Print time (inference-only) - # LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") + LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image From 9d6fed15517a7a5bbb045380b3fa4416dea5349c Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 12 Sep 2022 11:24:19 +0800 Subject: [PATCH 241/247] remove upsample --- segment/train.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/segment/train.py b/segment/train.py index 36e8f153f677..b1e3648e5478 100644 --- a/segment/train.py +++ b/segment/train.py @@ -350,8 +350,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # return # Mosaic plots - if mask_ratio != 1: - masks = F.interpolate(masks[None].float(), (imgsz, imgsz), mode="bilinear", align_corners=False)[0] if plots: if ni < 3: plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg") From 5a9d410f03a8f50cb8b84c0a30a84d512fe99a62 Mon Sep 17 00:00:00 2001 From: Laughing-q <1185102784@qq.com> Date: Mon, 12 Sep 2022 11:25:00 +0800 Subject: [PATCH 242/247] update interpolate --- utils/segment/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 955faf3a36b4..b45b2c27e0a0 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -83,7 +83,7 @@ def __call__(self, preds, targets, masks): # predictions, targets, model # Mask regression if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample - masks = F.interpolate(masks[None], (mask_h, mask_w), mode="bilinear", align_corners=False)[0] + masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0] marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) for bi in b.unique(): From c2ea6c9b6c74a8f619bef188885fc2daaa5ab4f0 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Mon, 12 Sep 2022 13:10:15 +0530 Subject: [PATCH 243/247] fix validation plot logging --- segment/train.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/segment/train.py b/segment/train.py index b1e3648e5478..bda379176151 100644 --- a/segment/train.py +++ b/segment/train.py @@ -392,9 +392,6 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # Log val metrics and media metrics_dict = dict(zip(KEYS, log_vals)) logger.log_metrics(metrics_dict, epoch) - if plots: - files = sorted(save_dir.glob('val*.jpg')) - logger.log_images(files, "Validation", epoch) # Save model if (not nosave) or (final_epoch and not evolve): # if save @@ -460,16 +457,16 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio # callbacks.run('on_train_end', last, best, epoch, results) # on train end callback using genericLogger - logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs + 1) + logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs) if not opt.evolve: - logger.log_model(best, epoch + 1) + logger.log_model(best, epoch) if plots: plot_results_with_masks(file=save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") logger.log_images(files, "Results", epoch + 1) - + logger.log_images(sorted(save_dir.glob('val*.jpg')), "Validation", epoch + 1) torch.cuda.empty_cache() return results From 52fbe315412e208bf5f71c4452fca71bd563064b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 15 Sep 2022 23:47:35 +0200 Subject: [PATCH 244/247] Annotator.masks() cleanup --- segment/predict.py | 58 +++++++++++++++++++++++----------------------- utils/plots.py | 22 +++++++++--------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 24ad81774a3f..310c2222130b 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -50,34 +50,34 @@ @smart_inference_mode() def run( - weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) - source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam - data=ROOT / 'data/coco128.yaml', # dataset.yaml path - imgsz=(640, 640), # inference size (height, width) - conf_thres=0.25, # confidence threshold - iou_thres=0.45, # NMS IOU threshold - max_det=1000, # maximum detections per image - device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu - view_img=False, # show results - save_txt=False, # save results to *.txt - save_conf=False, # save confidences in --save-txt labels - save_crop=False, # save cropped prediction boxes - nosave=False, # do not save images/videos - classes=None, # filter by class: --class 0, or --class 0 2 3 - agnostic_nms=False, # class-agnostic NMS - augment=False, # augmented inference - visualize=False, # visualize features - update=False, # update all models - project=ROOT / 'runs/predict-seg', # save results to project/name - name='exp', # save results to project/name - exist_ok=False, # existing project/name ok, do not increment - line_thickness=3, # bounding box thickness (pixels) - hide_labels=False, # hide labels - hide_conf=False, # hide confidences - half=False, # use FP16 half-precision inference - dnn=False, # use OpenCV DNN for ONNX inference - vid_stride=1, # video frame-rate stride - retina_masks=False, + weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) + source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + data=ROOT / 'data/coco128.yaml', # dataset.yaml path + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / 'runs/predict-seg', # save results to project/name + name='exp', # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + vid_stride=1, # video frame-rate stride + retina_masks=False, ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images @@ -160,7 +160,7 @@ def run( # Mask plotting annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], - img_gpu=None if retina_masks else im[i]) + im_gpu=None if retina_masks else im[i]) # Write results for *xyxy, conf, cls in reversed(det[:, :6]): diff --git a/utils/plots.py b/utils/plots.py index 103364864a08..d8d5b225a774 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -114,18 +114,18 @@ def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 2 thickness=tf, lineType=cv2.LINE_AA) - def masks(self, masks, colors, img_gpu=None, alpha=0.5): + def masks(self, masks, colors, im_gpu=None, alpha=0.5): """Plot masks at once. Args: masks (tensor): predicted masks on cuda, shape: [n, h, w] colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] - img_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] - retina_masks (bool): whether to plot masks in native resolution. + im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] + alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque """ if self.pil: # convert to numpy first self.im = np.asarray(self.im).copy() - if img_gpu is None: + if im_gpu is None: # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) if len(masks) == 0: return @@ -142,8 +142,8 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5): self.im[:] = masks * alpha + self.im * (1 - s * alpha) else: if len(masks) == 0: - self.im[:] = img_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 - colors = torch.tensor(colors, device=img_gpu.device, dtype=torch.float32) / 255.0 + self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 + colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0 colors = colors[:, None, None] # shape(n,1,1,3) masks = masks.unsqueeze(3) # shape(n,h,w,1) masks_color = masks * (colors * alpha) # shape(n,h,w,3) @@ -151,11 +151,11 @@ def masks(self, masks, colors, img_gpu=None, alpha=0.5): inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) - img_gpu = img_gpu.flip(dims=[0]) # flip channel - img_gpu = img_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) - img_gpu = img_gpu * inv_alph_masks[-1] + mcs - im_mask = (img_gpu * 255).byte().cpu().numpy() - self.im[:] = scale_image(img_gpu.shape, im_mask, self.im.shape) + im_gpu = im_gpu.flip(dims=[0]) # flip channel + im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) + im_gpu = im_gpu * inv_alph_masks[-1] + mcs + im_mask = (im_gpu * 255).byte().cpu().numpy() + self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape) if self.pil: # convert im back to PIL and update draw self.fromarray(self.im) From 6de176cc80514b9b63a2839c3c68c0e34ba5daf7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 Sep 2022 21:48:03 +0000 Subject: [PATCH 245/247] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- segment/predict.py | 56 +++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/segment/predict.py b/segment/predict.py index 310c2222130b..ba4cf2905255 100644 --- a/segment/predict.py +++ b/segment/predict.py @@ -50,34 +50,34 @@ @smart_inference_mode() def run( - weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) - source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam - data=ROOT / 'data/coco128.yaml', # dataset.yaml path - imgsz=(640, 640), # inference size (height, width) - conf_thres=0.25, # confidence threshold - iou_thres=0.45, # NMS IOU threshold - max_det=1000, # maximum detections per image - device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu - view_img=False, # show results - save_txt=False, # save results to *.txt - save_conf=False, # save confidences in --save-txt labels - save_crop=False, # save cropped prediction boxes - nosave=False, # do not save images/videos - classes=None, # filter by class: --class 0, or --class 0 2 3 - agnostic_nms=False, # class-agnostic NMS - augment=False, # augmented inference - visualize=False, # visualize features - update=False, # update all models - project=ROOT / 'runs/predict-seg', # save results to project/name - name='exp', # save results to project/name - exist_ok=False, # existing project/name ok, do not increment - line_thickness=3, # bounding box thickness (pixels) - hide_labels=False, # hide labels - hide_conf=False, # hide confidences - half=False, # use FP16 half-precision inference - dnn=False, # use OpenCV DNN for ONNX inference - vid_stride=1, # video frame-rate stride - retina_masks=False, + weights=ROOT / 'yolov5s-seg.pt', # model.pt path(s) + source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam + data=ROOT / 'data/coco128.yaml', # dataset.yaml path + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / 'runs/predict-seg', # save results to project/name + name='exp', # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + vid_stride=1, # video frame-rate stride + retina_masks=False, ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images From 4958de59c712fc4bc9caa5ea2aaeb12b3c906d13 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 15 Sep 2022 23:48:52 +0200 Subject: [PATCH 246/247] Remove segmentation_model definition --- models/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/models/common.py b/models/common.py index 8aea833388af..0d90ff4f8827 100644 --- a/models/common.py +++ b/models/common.py @@ -337,7 +337,6 @@ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() - segmentation_model = type(model.model[-1]).__name__ == 'Segment' elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata From 753adac58f00b97b1f0e58699cab36a280236469 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 15 Sep 2022 23:56:49 +0200 Subject: [PATCH 247/247] Restore 0.99999 decimals --- models/yolo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/yolo.py b/models/yolo.py index d59034bd4041..a0702a7c0257 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -257,7 +257,7 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)