diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 diff --git a/data/scripts/download_weights.sh b/data/scripts/download_weights.sh old mode 100755 new mode 100644 diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh old mode 100755 new mode 100644 diff --git a/data/scripts/get_coco128.sh b/data/scripts/get_coco128.sh old mode 100755 new mode 100644 diff --git a/data/scripts/get_imagenet.sh b/data/scripts/get_imagenet.sh old mode 100755 new mode 100644 diff --git a/detect2.py b/detect2.py new file mode 100644 index 000000000000..e54ce21e6800 --- /dev/null +++ b/detect2.py @@ -0,0 +1,334 @@ +# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license +""" +Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc. + +Usage - sources: + $ python detect.py --weights yolov5s.pt --source 0 # webcam + img.jpg # image + vid.mp4 # video + screen # screenshot + path/ # directory + list.txt # list of images + list.streams # list of streams + 'path/*.jpg' # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream + +Usage - formats: + $ python detect.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s_openvino_model # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS-only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU + yolov5s_paddle_model # PaddlePaddle +""" + +import argparse +import os +import platform +import sys +from pathlib import Path + +import torch + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.common import DetectMultiBackend +from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams +from utils.general import ( + LOGGER, + Profile, + check_file, + check_img_size, + check_imshow, + check_requirements, + colorstr, + cv2, + increment_path, + non_max_suppression, + print_args, + scale_boxes, + strip_optimizer, + xyxy2xywh, +) +from utils.plots import Annotator, colors, save_one_box +from utils.torch_utils import select_device, smart_inference_mode + + +@smart_inference_mode() +def run( + weights=ROOT / "yolov5s.pt", # model path or triton URL + source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam) + data=ROOT / "data/coco128.yaml", # dataset.yaml path + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / "runs/detect", # save results to project/name + name="exp", # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + vid_stride=1, # video frame-rate stride +): + source = str(source) + save_img = True # not nosave and not source.endswith('.txt') # save inference images + is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) + is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://")) + webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file) + screenshot = source.lower().startswith("screen") + if is_url and is_file: + source = check_file(source) # download + + # définir les classes dont on veut afficher les rectangles sur les images, afficher les 3 en même temps peut rendre l'image difficilement lisble + liste_classes_sauvegardées = ["Porte-Aiguille"] + # Autres classes possibles : "GepBox" ou "Pincette" + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + device = select_device(device) + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, names, pt = model.stride, model.names, model.pt + imgsz = check_img_size(imgsz, s=stride) # check image size + + # Dataloader + bs = 1 # batch_size + if webcam: + view_img = check_imshow(warn=True) + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) + bs = len(dataset) + elif screenshot: + dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) + else: + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) + vid_path, vid_writer = [None] * bs, [None] * bs + + # Run inference + model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup + seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) + compteur = 0 + for path, im, im0s, vid_cap, s in dataset: + with dt[0]: + im = torch.from_numpy(im).to(model.device) + im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + + # Inference + with dt[1]: + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred = model(im, augment=augment, visualize=visualize) + + # NMS + with dt[2]: + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + + # Second-stage classifier (optional) + # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) + + # Process predictions + count = 0 + for i, det in enumerate(pred): # per image + compteur += 1 + count += 1 + seen += 1 + if webcam: # batch_size >= 1 + p, im0, frame = path[i], im0s[i].copy(), dataset.count + s += f"{i}: " + else: + p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0) + l = p.split("/")[-1].split("jpg")[0].split("_") + # print(l) + save_name = l[0] + "_" + l[1] + "_" + l[2] + id_texte = l[-1] + chemin = save_name + "_coordinates.txt" + if not os.path.isfile(chemin): + f = open(chemin, "w") + f.write("ID, x1, y1, x2, y2, Confidence, Classe\n") + f.close() + p = Path(p) # to Path + + save_path = str(save_dir / p.name) # im.jpg + txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") # im.txt + s += "%gx%g " % im.shape[2:] # print string + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh + imc = im0.copy() if save_crop else im0 # for save_crop + annotator = Annotator(im0, line_width=line_thickness, example=str(names)) + if len(det): + # Rescale boxes from img_size to im0 size + det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() + + # Print results + for c in det[:, 5].unique(): + n = (det[:, 5] == c).sum() # detections per class + s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string + + # Write results + l_max = [0] * len(names) + count_max = 0 + for *xyxy, conf, cls in reversed(det): + count_max += 1 + if l_max[int(cls)] < float(conf.cpu().numpy()): + l_max[int(cls)] = count_max + + count_max = 0 + for *xyxy, conf, cls in reversed(det): + count_max += 1 + if ( + save_txt and l_max[int(cls)] == count_max + ): # Write to file if box with maximum conf for one class + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + x1 = int(xyxy[0].item()) + y1 = int(xyxy[1].item()) + x2 = int(xyxy[2].item()) + y2 = int(xyxy[3].item()) + + confidence_score = conf + conf = float(conf.cpu().numpy()) + class_index = cls + object_name = names[int(cls)] + f = open(chemin, "a+") + f.write( + id_texte + + "," + + str(x1) + + "," + + str(y1) + + "," + + str(x2) + + "," + + str(y2) + + "," + + str(conf) + + "," + + str(object_name) + + "\n" + ) + f.close() + + if ( + (save_img or save_crop or view_img) + and l_max[int(cls)] == count_max + and names[int(cls)] in liste_classes_sauvegardées + ): # Add bbox to image if box with maximum conf for PorteAiguille class + c = int(cls) # integer class + label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}") + annotator.box_label(xyxy, label, color=colors(c, True)) + + count_max += 1 + + if save_crop: + save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True) + # Stream results + im0 = annotator.result() + if view_img: + if platform.system() == "Linux" and p not in windows: + windows.append(p) + cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) + cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) + cv2.imshow(str(p), im0) + cv2.waitKey(1) # 1 millisecond + + # Save results (image with detections) + if save_img: + if dataset.mode == "image": + cv2.imwrite(save_path, im0) + else: # 'video' or 'stream' + if vid_path[i] != save_path: # new video + vid_path[i] = save_path + if isinstance(vid_writer[i], cv2.VideoWriter): + vid_writer[i].release() # release previous video writer + if vid_cap: # video + fps = vid_cap.get(cv2.CAP_PROP_FPS) + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + else: # stream + fps, w, h = 30, im0.shape[1], im0.shape[0] + save_path = str(Path(save_path).with_suffix(".mp4")) # force *.mp4 suffix on results videos + vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + vid_writer[i].write(im0) + + # Print time (inference-only) + LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") + + # Print results + t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image + LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t) + if save_txt or save_img: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") + if update: + strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path or triton URL") + parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)") + parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path") + parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w") + parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold") + parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold") + parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu") + parser.add_argument("--view-img", action="store_true", help="show results") + parser.add_argument("--save-txt", action="store_true", help="save results to *.txt") + parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels") + parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes") + parser.add_argument("--nosave", action="store_true", help="do not save images/videos") + parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3") + parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS") + parser.add_argument("--augment", action="store_true", help="augmented inference") + parser.add_argument("--visualize", action="store_true", help="visualize features") + parser.add_argument("--update", action="store_true", help="update all models") + parser.add_argument("--project", default=ROOT / "runs/detect", help="save results to project/name") + parser.add_argument("--name", default="exp", help="save results to project/name") + parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment") + parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)") + parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels") + parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences") + parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference") + parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference") + parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride") + opt = parser.parse_args() + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand + print_args(vars(opt)) + return opt + + +def main(opt): + check_requirements(exclude=("tensorboard", "thop")) + run(**vars(opt)) + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) diff --git a/detect3.py b/detect3.py new file mode 100644 index 000000000000..e05c624a0b17 --- /dev/null +++ b/detect3.py @@ -0,0 +1,320 @@ +# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license +""" +Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc. + +Usage - sources: + $ python detect.py --weights yolov5s.pt --source 0 # webcam + img.jpg # image + vid.mp4 # video + screen # screenshot + path/ # directory + list.txt # list of images + list.streams # list of streams + 'path/*.jpg' # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream + +Usage - formats: + $ python detect.py --weights yolov5s.pt # PyTorch + yolov5s.torchscript # TorchScript + yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s_openvino_model # OpenVINO + yolov5s.engine # TensorRT + yolov5s.mlmodel # CoreML (macOS-only) + yolov5s_saved_model # TensorFlow SavedModel + yolov5s.pb # TensorFlow GraphDef + yolov5s.tflite # TensorFlow Lite + yolov5s_edgetpu.tflite # TensorFlow Edge TPU + yolov5s_paddle_model # PaddlePaddle +""" + +import argparse +import os +import platform +import sys +from pathlib import Path + +import torch + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.common import DetectMultiBackend +from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams +from utils.general import ( + LOGGER, + Profile, + check_file, + check_img_size, + check_imshow, + check_requirements, + colorstr, + cv2, + increment_path, + non_max_suppression, + print_args, + scale_boxes, + strip_optimizer, + xyxy2xywh, +) +from utils.plots import Annotator, colors, save_one_box +from utils.torch_utils import select_device, smart_inference_mode + + +@smart_inference_mode() +def run( + weights=ROOT / "yolov5s.pt", # model path or triton URL + source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam) + data=ROOT / "data/coco128.yaml", # dataset.yaml path + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + save_txt=False, # save results to *.txt + save_conf=False, # save confidences in --save-txt labels + save_crop=False, # save cropped prediction boxes + nosave=False, # do not save images/videos + classes=None, # filter by class: --class 0, or --class 0 2 3 + agnostic_nms=False, # class-agnostic NMS + augment=False, # augmented inference + visualize=False, # visualize features + update=False, # update all models + project=ROOT / "runs/detect", # save results to project/name + name="exp", # save results to project/name + exist_ok=False, # existing project/name ok, do not increment + line_thickness=3, # bounding box thickness (pixels) + hide_labels=False, # hide labels + hide_conf=False, # hide confidences + half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + vid_stride=1, # video frame-rate stride +): + source = str(source) + save_img = not nosave and not source.endswith(".txt") # save inference images + is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) + is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://")) + webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file) + screenshot = source.lower().startswith("screen") + if is_url and is_file: + source = check_file(source) # download + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + device = select_device(device) + model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) + stride, names, pt = model.stride, model.names, model.pt + imgsz = check_img_size(imgsz, s=stride) # check image size + + # Dataloader + bs = 1 # batch_size + if webcam: + view_img = check_imshow(warn=True) + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) + bs = len(dataset) + elif screenshot: + dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) + else: + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) + vid_path, vid_writer = [None] * bs, [None] * bs + + # Run inference + model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup + seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) + compteur = 0 + path = "box_coordinates.txt" + f = open(path, "w") + f.write("ID, x1, y1, x2, y2, Confidence, Classe\n") + for path, im, im0s, vid_cap, s in dataset: + with dt[0]: + im = torch.from_numpy(im).to(model.device) + im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + + # Inference + with dt[1]: + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred = model(im, augment=augment, visualize=visualize) + + # NMS + with dt[2]: + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + + # Second-stage classifier (optional) + # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) + + # Process predictions + count = 0 + for i, det in enumerate(pred): # per image + compteur += 1 + count += 1 + seen += 1 + if webcam: # batch_size >= 1 + p, im0, frame = path[i], im0s[i].copy(), dataset.count + s += f"{i}: " + else: + p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0) + print(p.split("/")[-1].split("jpg")[0].split("_")[-2]) + p = Path(p) # to Path + + save_path = str(save_dir / p.name) # im.jpg + txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") # im.txt + s += "%gx%g " % im.shape[2:] # print string + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh + imc = im0.copy() if save_crop else im0 # for save_crop + annotator = Annotator(im0, line_width=line_thickness, example=str(names)) + if len(det): + # Rescale boxes from img_size to im0 size + det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() + + # Print results + for c in det[:, 5].unique(): + n = (det[:, 5] == c).sum() # detections per class + s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string + + # Write results + l_max = [0] * len(names) + count_max = 0 + for *xyxy, conf, cls in reversed(det): + count_max += 1 + if l_max[int(cls)] < float(conf.cpu().numpy()): + l_max[int(cls)] = count_max + + count_max = 0 + for *xyxy, conf, cls in reversed(det): + count_max += 1 + if ( + save_txt and l_max[int(cls)] == count_max + ): # Write to file if box with maximum conf for one class + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + x1 = int(xyxy[0].item()) + y1 = int(xyxy[1].item()) + x2 = int(xyxy[2].item()) + y2 = int(xyxy[3].item()) + + confidence_score = conf + conf = float(conf.cpu().numpy()) + class_index = cls + object_name = names[int(cls)] + + f.write( + str(compteur) + + "," + + str(x1) + + "," + + str(y1) + + "," + + str(x2) + + "," + + str(y2) + + "," + + str(conf) + + "," + + str(object_name) + + "\n" + ) + + if (save_img or save_crop or view_img) and l_max[ + int(cls) + ] == count_max: # Add bbox to image if box with maximum conf for one class + c = int(cls) # integer class + label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}") + annotator.box_label(xyxy, label, color=colors(c, True)) + if save_crop: + save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True) + # Stream results + im0 = annotator.result() + if view_img: + if platform.system() == "Linux" and p not in windows: + windows.append(p) + cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) + cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) + cv2.imshow(str(p), im0) + cv2.waitKey(1) # 1 millisecond + + # Save results (image with detections) + if save_img: + if dataset.mode == "image": + cv2.imwrite(save_path, im0) + else: # 'video' or 'stream' + if vid_path[i] != save_path: # new video + vid_path[i] = save_path + if isinstance(vid_writer[i], cv2.VideoWriter): + vid_writer[i].release() # release previous video writer + if vid_cap: # video + fps = vid_cap.get(cv2.CAP_PROP_FPS) + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + else: # stream + fps, w, h = 30, im0.shape[1], im0.shape[0] + save_path = str(Path(save_path).with_suffix(".mp4")) # force *.mp4 suffix on results videos + vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + vid_writer[i].write(im0) + + # Print time (inference-only) + LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") + + # Print results + f.close() + t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image + LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t) + if save_txt or save_img: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") + if update: + strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path or triton URL") + parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)") + parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path") + parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w") + parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold") + parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold") + parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu") + parser.add_argument("--view-img", action="store_true", help="show results") + parser.add_argument("--save-txt", action="store_true", help="save results to *.txt") + parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels") + parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes") + parser.add_argument("--nosave", action="store_true", help="do not save images/videos") + parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3") + parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS") + parser.add_argument("--augment", action="store_true", help="augmented inference") + parser.add_argument("--visualize", action="store_true", help="visualize features") + parser.add_argument("--update", action="store_true", help="update all models") + parser.add_argument("--project", default=ROOT / "runs/detect", help="save results to project/name") + parser.add_argument("--name", default="exp", help="save results to project/name") + parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment") + parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)") + parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels") + parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences") + parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference") + parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference") + parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride") + opt = parser.parse_args() + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand + print_args(vars(opt)) + return opt + + +def main(opt): + check_requirements(exclude=("tensorboard", "thop")) + run(**vars(opt)) + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) diff --git a/export.py b/export.py index dfb1c06fb5e2..f3216a564290 100644 --- a/export.py +++ b/export.py @@ -449,6 +449,7 @@ def transform_fn(data_item): Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. + Parameters: data_item: Tuple with data item produced by DataLoader during iteration Returns: diff --git a/utils/augmentations.py b/utils/augmentations.py index 4a6e441d7c45..bdbe07712716 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -156,7 +156,6 @@ def random_perspective( ): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] - """Applies random perspective transformation to an image, modifying the image and corresponding labels.""" height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 diff --git a/utils/callbacks.py b/utils/callbacks.py index 0a0bcbdb2b96..21c587bd74c6 100644 --- a/utils/callbacks.py +++ b/utils/callbacks.py @@ -64,7 +64,6 @@ def run(self, hook, *args, thread=False, **kwargs): thread: (boolean) Run callbacks in daemon thread kwargs: Keyword Arguments to receive from YOLOv5 """ - assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" for logger in self._callbacks[hook]: if thread: diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 21308f0cedbd..bdeffec465e7 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -1104,7 +1104,8 @@ def extract_boxes(path=DATASETS_DIR / "coco128"): def autosplit(path=DATASETS_DIR / "coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False): """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files Usage: from utils.dataloaders import *; autosplit() - Arguments + + Arguments: path: Path to images directory weights: Train, val, test weights (list, tuple) annotated_only: Only use images with an annotated txt file @@ -1183,7 +1184,7 @@ class HUBDatasetStats: """ Class for generating HUB dataset JSON and `-hub` dataset directory. - Arguments + Arguments: path: Path to data.yaml or data.zip (with data.yaml inside data.zip) autodownload: Attempt to download dataset if not found locally @@ -1314,7 +1315,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder): """ YOLOv5 Classification Dataset. - Arguments + Arguments: root: Dataset path transform: torchvision transforms, used by default album_transform: Albumentations transforms, used if installed diff --git a/utils/general.py b/utils/general.py index e311504b3031..57db68a7ac76 100644 --- a/utils/general.py +++ b/utils/general.py @@ -518,7 +518,6 @@ def check_font(font=FONT, progress=False): def check_dataset(data, autodownload=True): """Validates and/or auto-downloads a dataset, returning its configuration as a dictionary.""" - # Download (optional) extract_dir = "" if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): @@ -1023,7 +1022,6 @@ def non_max_suppression( Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] """ - # Checks assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 2bd8583d2ade..7051e8da0a29 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -350,7 +350,8 @@ class GenericLogger: """ YOLOv5 General purpose logger for non-task specific logging Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...) - Arguments + + Arguments: opt: Run arguments console_logger: Console logger include: loggers to include diff --git a/utils/loggers/clearml/clearml_utils.py b/utils/loggers/clearml/clearml_utils.py index 2b5351ef8533..de4129e08a16 100644 --- a/utils/loggers/clearml/clearml_utils.py +++ b/utils/loggers/clearml/clearml_utils.py @@ -80,7 +80,7 @@ def __init__(self, opt, hyp): - Initialize ClearML Task, this object will capture the experiment - Upload dataset version to ClearML Data if opt.upload_dataset is True - arguments: + Arguments: opt (namespace) -- Commandline arguments for this run hyp (dict) -- Hyperparameters for this run @@ -133,7 +133,7 @@ def log_scalars(self, metrics, epoch): """ Log scalars/metrics to ClearML. - arguments: + Arguments: metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...} epoch (int) iteration number for the current set of metrics """ @@ -145,7 +145,7 @@ def log_model(self, model_path, model_name, epoch=0): """ Log model weights to ClearML. - arguments: + Arguments: model_path (PosixPath or str) Path to the model weights model_name (str) Name of the model visible in ClearML epoch (int) Iteration / epoch of the model weights @@ -158,7 +158,7 @@ def log_summary(self, metrics): """ Log final metrics to a summary table. - arguments: + Arguments: metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...} """ for k, v in metrics.items(): @@ -168,7 +168,7 @@ def log_plot(self, title, plot_path): """ Log image as plot in the plot section of ClearML. - arguments: + Arguments: title (str) Title of the plot plot_path (PosixPath or str) Path to the saved image file """ @@ -183,7 +183,7 @@ def log_debug_samples(self, files, title="Debug Samples"): """ Log files (images) as debug samples in the ClearML task. - arguments: + Arguments: files (List(PosixPath)) a list of file paths in PosixPath format title (str) A title that groups together images with the same values """ @@ -199,7 +199,7 @@ def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_thres """ Draw the bounding boxes on a single image and report the result as a ClearML debug sample. - arguments: + Arguments: image_path (PosixPath) the path the original image file boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] class_names (dict): dict containing mapping of class int to class name diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py index 930f2c7543af..6a32c8cc7b03 100644 --- a/utils/loggers/wandb/wandb_utils.py +++ b/utils/loggers/wandb/wandb_utils.py @@ -49,7 +49,7 @@ def __init__(self, opt, run_id=None, job_type="Training"): - Upload dataset if opt.upload_dataset is True - Setup training processes if job_type is 'Training' - arguments: + Arguments: opt (namespace) -- Commandline arguments for this run run_id (str) -- Run ID of W&B run to be resumed job_type (str) -- To set the job_type for this run @@ -90,7 +90,7 @@ def setup_training(self, opt): - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded - Setup log_dict, initialize bbox_interval - arguments: + Arguments: opt (namespace) -- commandline arguments for this run """ @@ -120,7 +120,7 @@ def log_model(self, path, opt, epoch, fitness_score, best_model=False): """ Log the model checkpoint as W&B artifact. - arguments: + Arguments: path (Path) -- Path of directory containing the checkpoints opt (namespace) -- Command line arguments for this run epoch (int) -- Current epoch number @@ -159,7 +159,7 @@ def log(self, log_dict): """ Save the metrics to the logging dictionary. - arguments: + Arguments: log_dict (Dict) -- metrics/media to be logged in current step """ if self.wandb_run: @@ -170,7 +170,7 @@ def end_epoch(self): """ Commit the log_dict, model artifacts and Tables to W&B and flush the log_dict. - arguments: + Arguments: best_result (boolean): Boolean representing if the result of this evaluation is best or not """ if self.wandb_run: @@ -197,7 +197,7 @@ def finish_run(self): @contextmanager def all_logging_disabled(highest_level=logging.CRITICAL): - """source - https://gist.github.com/simon-weber/7853144 + """Source - https://gist.github.com/simon-weber/7853144 A context manager that will prevent any logging messages triggered during the body from being processed. :param highest_level: the maximum logging level in use. This would only need to be changed if a custom level greater than CRITICAL is defined. diff --git a/utils/metrics.py b/utils/metrics.py index 385fdc471748..9acc38591f96 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -41,7 +41,6 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names # Returns The average precision as computed in py-faster-rcnn. """ - # Sort by objectness i = np.argsort(-conf) tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] @@ -103,7 +102,6 @@ def compute_ap(recall, precision): # Returns Average precision, precision curve, recall curve """ - # Append sentinel values to beginning and end mrec = np.concatenate(([0.0], recall, [1.0])) mpre = np.concatenate(([1.0], precision, [0.0])) @@ -137,6 +135,7 @@ def process_batch(self, detections, labels): Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: detections (Array[N, 6]), x1, y1, x2, y2, conf, class labels (Array[M, 5]), class, x1, y1, x2, y2 @@ -233,7 +232,6 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7 Input shapes are box1(1,4) to box2(n,4). """ - # Get the coordinates of bounding boxes if xywh: # transform from xywh to xyxy (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) @@ -279,14 +277,15 @@ def box_iou(box1, box2, eps=1e-7): Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) + Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) @@ -304,7 +303,6 @@ def bbox_ioa(box1, box2, eps=1e-7): box2: np.array of shape(nx4) returns: np.array of shape(n) """ - # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1 b2_x1, b2_y1, b2_x2, b2_y2 = box2.T diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py index d7dd8aec6691..2e1dca1198b0 100644 --- a/utils/segment/augmentations.py +++ b/utils/segment/augmentations.py @@ -29,7 +29,6 @@ def random_perspective( ): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] - """Applies random perspective, rotation, scale, shear, and translation augmentations to an image and targets.""" height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 diff --git a/utils/segment/general.py b/utils/segment/general.py index 2f65d60238dd..0793470a95e4 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -14,7 +14,6 @@ def crop_mask(masks, boxes): - masks should be a size [n, h, w] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form """ - n, h, w = masks.shape x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) @@ -33,7 +32,6 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): return: h, w, n """ - c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0] # CHW @@ -51,7 +49,6 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): return: h, w, n """ - c, mh, mw = protos.shape # CHW ih, iw = shape masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW diff --git a/utils/triton.py b/utils/triton.py index 3d529ec88a07..2fee42815517 100644 --- a/utils/triton.py +++ b/utils/triton.py @@ -17,10 +17,9 @@ class TritonRemoteModel: def __init__(self, url: str): """ - Keyword arguments: + Keyword Arguments: url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000 """ - parsed_url = urlparse(url) if parsed_url.scheme == "grpc": from tritonclient.grpc import InferenceServerClient, InferInput