Merge pull request #147 from tryolabs/yolov7

Yolov7
tryolabs · Aug 24, 2022 · 06e68bd · 06e68bd
2 parents d74dc11 + 5d48a75
commit 06e68bd
Show file tree

Hide file tree

Showing 6 changed files with 446 additions and 0 deletions.
diff --git a/demos/yolov7/Dockerfile.local b/demos/yolov7/Dockerfile.local
@@ -0,0 +1,14 @@
+FROM python:3.8-slim-buster
+
+WORKDIR /
+
+RUN apt update && \
+apt install -y libgl1 sudo gcc libglib2.0-0 git-all wget
+
+RUN pip3 install --upgrade pip && \
+pip3 install -r https://github.com/raw/WongKinYiu/yolov7/main/requirements.txt
+
+RUN git clone https://github.com/tryolabs/norfair.git ./norfair/ && \
+pip3 install ./norfair/
+
+WORKDIR /demo/src/
diff --git a/demos/yolov7/README.md b/demos/yolov7/README.md
@@ -0,0 +1,23 @@
+# YOLOv7 Example
+
+Simplest possible example of tracking. Based on [Yolov7](https://github.com/WongKinYiu/yolov7).
+
+## Instructions
+
+1. Build and run the Docker container with:
+    ```bash
+        ./run_docker.sh
+    ``` 
+
+1. In the container, display the demo instructions: 
+    ```bash
+        python demo.py --help 
+    ``` 
+   Bonus: Use additional arguments `--detector-path`, `--img-size`, `--iou-threshold`,`--conf-threshold`, `--classes`, `--track-points` as you wish.
+
+
+## Explanation
+
+This example tracks objects using a single point per detection: the centroid of the bounding boxes around cars returned by Yolov7.
+
+![Norfair Yolov7 demo](../../docs/yolov7_cars.gif)
diff --git a/demos/yolov7/run_docker.sh b/demos/yolov7/run_docker.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+docker build . -f Dockerfile.local -t norfair-yolov7
+docker run --gpus all -it --shm-size=1gb --rm -v `realpath .`:/demo norfair-yolov7 bash
diff --git a/demos/yolov7/src/demo.py b/demos/yolov7/src/demo.py
@@ -0,0 +1,191 @@
+import argparse
+from typing import List, Optional, Union
+import os
+
+import numpy as np
+import torch
+import torchvision.ops.boxes as bops
+
+import norfair
+from norfair import Detection, Tracker, Video, Paths
+
+DISTANCE_THRESHOLD_BBOX: float = 3.33
+DISTANCE_THRESHOLD_CENTROID: int = 30
+MAX_DISTANCE: int = 10000
+
+
+class YOLO:
+    def __init__(self, model_path: str, device: Optional[str] = None):
+        if device is not None and "cuda" in device and not torch.cuda.is_available():
+            raise Exception(
+                "Selected device='cuda', but cuda is not available to Pytorch."
+            )
+        # automatically set device if its None
+        elif device is None:
+            device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+        if not os.path.exists(model_path):
+            os.system(f'wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/{os.path.basename(model_path)} -O {model_path}')
+
+        # load model
+        try:
+            self.model = torch.hub.load('WongKinYiu/yolov7', 'custom', model_path) 
+        except:
+            raise Exception("Failed to load model from {}".format(model_path))
+    def __call__(
+        self,
+        img: Union[str, np.ndarray],
+        conf_threshold: float = 0.25,
+        iou_threshold: float = 0.45,
+        image_size: int = 720,
+        classes: Optional[List[int]] = None
+    ) -> torch.tensor:
+
+        self.model.conf = conf_threshold
+        self.model.iou = iou_threshold
+        if classes is not None:
+            self.model.classes = classes
+        detections = self.model(img, size=image_size)
+        return detections
+
+
+def euclidean_distance(detection, tracked_object):
+    return np.linalg.norm(detection.points - tracked_object.estimate)
+
+
+def center(points):
+    return [np.mean(np.array(points), axis=0)]
+
+
+def iou_pytorch(detection, tracked_object):
+    # Slower but simplier version of iou
+
+    detection_points = np.concatenate([detection.points[0], detection.points[1]])
+    tracked_object_points = np.concatenate(
+        [tracked_object.estimate[0], tracked_object.estimate[1]]
+    )
+
+    box_a = torch.tensor([detection_points], dtype=torch.float)
+    box_b = torch.tensor([tracked_object_points], dtype=torch.float)
+    iou = bops.box_iou(box_a, box_b)
+
+    # Since 0 <= IoU <= 1, we define 1/IoU as a distance.
+    # Distance values will be in [1, inf)
+    return np.float(1 / iou if iou else MAX_DISTANCE)
+
+
+def iou(detection, tracked_object):
+    # Detection points will be box A
+    # Tracked objects point will be box B.
+
+    box_a = np.concatenate([detection.points[0], detection.points[1]])
+    box_b = np.concatenate([tracked_object.estimate[0], tracked_object.estimate[1]])
+
+    x_a = max(box_a[0], box_b[0])
+    y_a = max(box_a[1], box_b[1])
+    x_b = min(box_a[2], box_b[2])
+    y_b = min(box_a[3], box_b[3])
+
+    # Compute the area of intersection rectangle
+    inter_area = max(0, x_b - x_a + 1) * max(0, y_b - y_a + 1)
+
+    # Compute the area of both the prediction and tracker
+    # rectangles
+    box_a_area = (box_a[2] - box_a[0] + 1) * (box_a[3] - box_a[1] + 1)
+    box_b_area = (box_b[2] - box_b[0] + 1) * (box_b[3] - box_b[1] + 1)
+
+    # Compute the intersection over union by taking the intersection
+    # area and dividing it by the sum of prediction + tracker
+    # areas - the interesection area
+    iou = inter_area / float(box_a_area + box_b_area - inter_area)
+
+    # Since 0 <= IoU <= 1, we define 1/IoU as a distance.
+    # Distance values will be in [1, inf)
+    return 1 / iou if iou else (MAX_DISTANCE)
+
+
+def yolo_detections_to_norfair_detections(
+    yolo_detections: torch.tensor,
+    track_points: str = "centroid"  # bbox or centroid
+) -> List[Detection]:
+    """convert detections_as_xywh to norfair detections
+    """
+    norfair_detections: List[Detection] = []
+
+    if track_points == "centroid":
+        detections_as_xywh = yolo_detections.xywh[0]
+        for detection_as_xywh in detections_as_xywh:
+            centroid = np.array(
+                [
+                    detection_as_xywh[0].item(),
+                    detection_as_xywh[1].item()
+                ]
+            )
+            scores = np.array([detection_as_xywh[4].item()])
+            norfair_detections.append(
+                Detection(points=centroid, scores=scores)
+            )
+    elif track_points == "bbox":
+        detections_as_xyxy = yolo_detections.xyxy[0]
+        for detection_as_xyxy in detections_as_xyxy:
+            bbox = np.array(
+                [
+                    [detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
+                    [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()]
+                ]
+            )
+            scores = np.array([detection_as_xyxy[4].item(), detection_as_xyxy[4].item()])
+            norfair_detections.append(
+                Detection(points=bbox, scores=scores)
+            )
+
+    return norfair_detections
+
+
+parser = argparse.ArgumentParser(description="Track objects in a video.")
+parser.add_argument("files", type=str, nargs="+", help="Video files to process")
+parser.add_argument("--detector-path", type=str, default="/yolov7.pt", help="YOLOv7 model path")
+parser.add_argument("--img-size", type=int, default="720", help="YOLOv7 inference size (pixels)")
+parser.add_argument("--conf-threshold", type=float, default="0.25", help="YOLOv7 object confidence threshold")
+parser.add_argument("--iou-threshold", type=float, default="0.45", help="YOLOv7 IOU threshold for NMS")
+parser.add_argument("--classes", nargs="+", type=int, help="Filter by class: --classes 0, or --classes 0 2 3")
+parser.add_argument("--device", type=str, default=None, help="Inference device: 'cpu' or 'cuda'")
+parser.add_argument("--track-points", type=str, default="centroid", help="Track points: 'centroid' or 'bbox'")
+args = parser.parse_args()
+
+model = YOLO(args.detector_path, device=args.device)
+
+for input_path in args.files:
+    video = Video(input_path=input_path)
+
+    distance_function = iou if args.track_points == "bbox" else euclidean_distance
+    distance_threshold = (
+        DISTANCE_THRESHOLD_BBOX
+        if args.track_points == "bbox"
+        else DISTANCE_THRESHOLD_CENTROID
+    )
+
+    tracker = Tracker(
+        distance_function=distance_function,
+        distance_threshold=distance_threshold,
+    )
+    paths_drawer = Paths(center, attenuation=0.01)
+
+    for frame in video:
+        yolo_detections = model(
+            frame,
+            conf_threshold=args.conf_threshold,
+            iou_threshold=args.iou_threshold,
+            image_size=args.img_size,
+            classes=args.classes
+        )
+        detections = yolo_detections_to_norfair_detections(yolo_detections, track_points=args.track_points)
+        tracked_objects = tracker.update(detections=detections)
+        if args.track_points == "centroid":
+            norfair.draw_points(frame, detections)
+            norfair.draw_tracked_objects(frame, tracked_objects)
+        elif args.track_points == "bbox":
+            norfair.draw_boxes(frame, detections)
+            norfair.draw_tracked_boxes(frame, tracked_objects)
+        frame = paths_drawer.draw(frame, tracked_objects)
+        video.write(frame)